lib/CodeGen/SelectionDAG/TargetLowering.cpp

   1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This implements the TargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/CodeGen/TargetLowering.h"
  15 #include "llvm/ADT/BitVector.h"
  16 #include "llvm/ADT/STLExtras.h"
  17 #include "llvm/CodeGen/CallingConvLower.h"
  18 #include "llvm/CodeGen/MachineFrameInfo.h"
  19 #include "llvm/CodeGen/MachineFunction.h"
  20 #include "llvm/CodeGen/MachineJumpTableInfo.h"
  21 #include "llvm/CodeGen/MachineRegisterInfo.h"
  22 #include "llvm/CodeGen/SelectionDAG.h"
  23 #include "llvm/CodeGen/TargetRegisterInfo.h"
  24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  25 #include "llvm/IR/DataLayout.h"
  26 #include "llvm/IR/DerivedTypes.h"
  27 #include "llvm/IR/GlobalVariable.h"
  28 #include "llvm/IR/LLVMContext.h"
  29 #include "llvm/MC/MCAsmInfo.h"
  30 #include "llvm/MC/MCExpr.h"
  31 #include "llvm/Support/ErrorHandling.h"
  32 #include "llvm/Support/KnownBits.h"
  33 #include "llvm/Support/MathExtras.h"
  34 #include "llvm/Target/TargetLoweringObjectFile.h"
  35 #include "llvm/Target/TargetMachine.h"
  36 #include <cctype>
  37 using namespace llvm;
  38
  39 /// NOTE: The TargetMachine owns TLOF.
  40 TargetLowering::TargetLowering(const TargetMachine &tm)
  41   : TargetLoweringBase(tm) {}
  42
  43 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
  44   return nullptr;
  45 }
  46
  47 bool TargetLowering::isPositionIndependent() const {
  48   return getTargetMachine().isPositionIndependent();
  49 }
  50
  51 /// Check whether a given call node is in tail position within its function. If
  52 /// so, it sets Chain to the input chain of the tail call.
  53 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
  54                                           SDValue &Chain) const {
  55   const Function &F = DAG.getMachineFunction().getFunction();
  56
  57   // Conservatively require the attributes of the call to match those of
  58   // the return. Ignore NoAlias and NonNull because they don't affect the
  59   // call sequence.
  60   AttributeList CallerAttrs = F.getAttributes();
  61   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
  62           .removeAttribute(Attribute::NoAlias)
  63           .removeAttribute(Attribute::NonNull)
  64           .hasAttributes())
  65     return false;
  66
  67   // It's not safe to eliminate the sign / zero extension of the return value.
  68   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
  69       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
  70     return false;
  71
  72   // Check if the only use is a function return node.
  73   return isUsedByReturnOnly(Node, Chain);
  74 }
  75
  76 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
  77     const uint32_t *CallerPreservedMask,
  78     const SmallVectorImpl<CCValAssign> &ArgLocs,
  79     const SmallVectorImpl<SDValue> &OutVals) const {
  80   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
  81     const CCValAssign &ArgLoc = ArgLocs[I];
  82     if (!ArgLoc.isRegLoc())
  83       continue;
  84     unsigned Reg = ArgLoc.getLocReg();
  85     // Only look at callee saved registers.
  86     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
  87       continue;
  88     // Check that we pass the value used for the caller.
  89     // (We look for a CopyFromReg reading a virtual register that is used
  90     //  for the function live-in value of register Reg)
  91     SDValue Value = OutVals[I];
  92     if (Value->getOpcode() != ISD::CopyFromReg)
  93       return false;
  94     unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
  95     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
  96       return false;
  97   }
  98   return true;
  99 }
 100
 101 /// Set CallLoweringInfo attribute flags based on a call instruction
 102 /// and called function attributes.
 103 void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,
 104                                                      unsigned ArgIdx) {
 105   IsSExt = CS->paramHasAttr(ArgIdx, Attribute::SExt);
 106   IsZExt = CS->paramHasAttr(ArgIdx, Attribute::ZExt);
 107   IsInReg = CS->paramHasAttr(ArgIdx, Attribute::InReg);
 108   IsSRet = CS->paramHasAttr(ArgIdx, Attribute::StructRet);
 109   IsNest = CS->paramHasAttr(ArgIdx, Attribute::Nest);
 110   IsByVal = CS->paramHasAttr(ArgIdx, Attribute::ByVal);
 111   IsInAlloca = CS->paramHasAttr(ArgIdx, Attribute::InAlloca);
 112   IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned);
 113   IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
 114   IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError);
 115   Alignment  = CS->getParamAlignment(ArgIdx);
 116 }
 117
 118 /// Generate a libcall taking the given operands as arguments and returning a
 119 /// result of type RetVT.
 120 std::pair<SDValue, SDValue>
 121 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
 122                             ArrayRef<SDValue> Ops, bool isSigned,
 123                             const SDLoc &dl, bool doesNotReturn,
 124                             bool isReturnValueUsed) const {
 125   TargetLowering::ArgListTy Args;
 126   Args.reserve(Ops.size());
 127
 128   TargetLowering::ArgListEntry Entry;
 129   for (SDValue Op : Ops) {
 130     Entry.Node = Op;
 131     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
 132     Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
 133     Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
 134     Args.push_back(Entry);
 135   }
 136
 137   if (LC == RTLIB::UNKNOWN_LIBCALL)
 138     report_fatal_error("Unsupported library call operation!");
 139   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
 140                                          getPointerTy(DAG.getDataLayout()));
 141
 142   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
 143   TargetLowering::CallLoweringInfo CLI(DAG);
 144   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
 145   CLI.setDebugLoc(dl)
 146       .setChain(DAG.getEntryNode())
 147       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
 148       .setNoReturn(doesNotReturn)
 149       .setDiscardResult(!isReturnValueUsed)
 150       .setSExtResult(signExtend)
 151       .setZExtResult(!signExtend);
 152   return LowerCallTo(CLI);
 153 }
 154
 155 /// Soften the operands of a comparison. This code is shared among BR_CC,
 156 /// SELECT_CC, and SETCC handlers.
 157 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
 158                                          SDValue &NewLHS, SDValue &NewRHS,
 159                                          ISD::CondCode &CCCode,
 160                                          const SDLoc &dl) const {
 161   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
 162          && "Unsupported setcc type!");
 163
 164   // Expand into one or more soft-fp libcall(s).
 165   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
 166   bool ShouldInvertCC = false;
 167   switch (CCCode) {
 168   case ISD::SETEQ:
 169   case ISD::SETOEQ:
 170     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
 171           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
 172           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
 173     break;
 174   case ISD::SETNE:
 175   case ISD::SETUNE:
 176     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
 177           (VT == MVT::f64) ? RTLIB::UNE_F64 :
 178           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
 179     break;
 180   case ISD::SETGE:
 181   case ISD::SETOGE:
 182     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
 183           (VT == MVT::f64) ? RTLIB::OGE_F64 :
 184           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
 185     break;
 186   case ISD::SETLT:
 187   case ISD::SETOLT:
 188     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
 189           (VT == MVT::f64) ? RTLIB::OLT_F64 :
 190           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
 191     break;
 192   case ISD::SETLE:
 193   case ISD::SETOLE:
 194     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
 195           (VT == MVT::f64) ? RTLIB::OLE_F64 :
 196           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
 197     break;
 198   case ISD::SETGT:
 199   case ISD::SETOGT:
 200     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
 201           (VT == MVT::f64) ? RTLIB::OGT_F64 :
 202           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
 203     break;
 204   case ISD::SETUO:
 205     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
 206           (VT == MVT::f64) ? RTLIB::UO_F64 :
 207           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
 208     break;
 209   case ISD::SETO:
 210     LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
 211           (VT == MVT::f64) ? RTLIB::O_F64 :
 212           (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
 213     break;
 214   case ISD::SETONE:
 215     // SETONE = SETOLT | SETOGT
 216     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
 217           (VT == MVT::f64) ? RTLIB::OLT_F64 :
 218           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
 219     LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
 220           (VT == MVT::f64) ? RTLIB::OGT_F64 :
 221           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
 222     break;
 223   case ISD::SETUEQ:
 224     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
 225           (VT == MVT::f64) ? RTLIB::UO_F64 :
 226           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
 227     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
 228           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
 229           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
 230     break;
 231   default:
 232     // Invert CC for unordered comparisons
 233     ShouldInvertCC = true;
 234     switch (CCCode) {
 235     case ISD::SETULT:
 236       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
 237             (VT == MVT::f64) ? RTLIB::OGE_F64 :
 238             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
 239       break;
 240     case ISD::SETULE:
 241       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
 242             (VT == MVT::f64) ? RTLIB::OGT_F64 :
 243             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
 244       break;
 245     case ISD::SETUGT:
 246       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
 247             (VT == MVT::f64) ? RTLIB::OLE_F64 :
 248             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
 249       break;
 250     case ISD::SETUGE:
 251       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
 252             (VT == MVT::f64) ? RTLIB::OLT_F64 :
 253             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
 254       break;
 255     default: llvm_unreachable("Do not know how to soften this setcc!");
 256     }
 257   }
 258
 259   // Use the target specific return value for comparions lib calls.
 260   EVT RetVT = getCmpLibcallReturnType();
 261   SDValue Ops[2] = {NewLHS, NewRHS};
 262   NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
 263                        dl).first;
 264   NewRHS = DAG.getConstant(0, dl, RetVT);
 265
 266   CCCode = getCmpLibcallCC(LC1);
 267   if (ShouldInvertCC)
 268     CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
 269
 270   if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
 271     SDValue Tmp = DAG.getNode(
 272         ISD::SETCC, dl,
 273         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
 274         NewLHS, NewRHS, DAG.getCondCode(CCCode));
 275     NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
 276                          dl).first;
 277     NewLHS = DAG.getNode(
 278         ISD::SETCC, dl,
 279         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
 280         NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
 281     NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
 282     NewRHS = SDValue();
 283   }
 284 }
 285
 286 /// Return the entry encoding for a jump table in the current function. The
 287 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
 288 unsigned TargetLowering::getJumpTableEncoding() const {
 289   // In non-pic modes, just use the address of a block.
 290   if (!isPositionIndependent())
 291     return MachineJumpTableInfo::EK_BlockAddress;
 292
 293   // In PIC mode, if the target supports a GPRel32 directive, use it.
 294   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
 295     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
 296
 297   // Otherwise, use a label difference.
 298   return MachineJumpTableInfo::EK_LabelDifference32;
 299 }
 300
 301 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
 302                                                  SelectionDAG &DAG) const {
 303   // If our PIC model is GP relative, use the global offset table as the base.
 304   unsigned JTEncoding = getJumpTableEncoding();
 305
 306   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
 307       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
 308     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
 309
 310   return Table;
 311 }
 312
 313 /// This returns the relocation base for the given PIC jumptable, the same as
 314 /// getPICJumpTableRelocBase, but as an MCExpr.
 315 const MCExpr *
 316 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 317                                              unsigned JTI,MCContext &Ctx) const{
 318   // The normal PIC reloc base is the label at the start of the jump table.
 319   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
 320 }
 321
 322 bool
 323 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 324   const TargetMachine &TM = getTargetMachine();
 325   const GlobalValue *GV = GA->getGlobal();
 326
 327   // If the address is not even local to this DSO we will have to load it from
 328   // a got and then add the offset.
 329   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
 330     return false;
 331
 332   // If the code is position independent we will have to add a base register.
 333   if (isPositionIndependent())
 334     return false;
 335
 336   // Otherwise we can do it.
 337   return true;
 338 }
 339
 340 //===----------------------------------------------------------------------===//
 341 //  Optimization Methods
 342 //===----------------------------------------------------------------------===//
 343
 344 /// If the specified instruction has a constant integer operand and there are
 345 /// bits set in that constant that are not demanded, then clear those bits and
 346 /// return true.
 347 bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 348                                             TargetLoweringOpt &TLO) const {
 349   SelectionDAG &DAG = TLO.DAG;
 350   SDLoc DL(Op);
 351   unsigned Opcode = Op.getOpcode();
 352
 353   // Do target-specific constant optimization.
 354   if (targetShrinkDemandedConstant(Op, Demanded, TLO))
 355     return TLO.New.getNode();
 356
 357   // FIXME: ISD::SELECT, ISD::SELECT_CC
 358   switch (Opcode) {
 359   default:
 360     break;
 361   case ISD::XOR:
 362   case ISD::AND:
 363   case ISD::OR: {
 364     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
 365     if (!Op1C)
 366       return false;
 367
 368     // If this is a 'not' op, don't touch it because that's a canonical form.
 369     const APInt &C = Op1C->getAPIntValue();
 370     if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
 371       return false;
 372
 373     if (!C.isSubsetOf(Demanded)) {
 374       EVT VT = Op.getValueType();
 375       SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
 376       SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
 377       return TLO.CombineTo(Op, NewOp);
 378     }
 379
 380     break;
 381   }
 382   }
 383
 384   return false;
 385 }
 386
 387 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
 388 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
 389 /// generalized for targets with other types of implicit widening casts.
 390 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
 391                                       const APInt &Demanded,
 392                                       TargetLoweringOpt &TLO) const {
 393   assert(Op.getNumOperands() == 2 &&
 394          "ShrinkDemandedOp only supports binary operators!");
 395   assert(Op.getNode()->getNumValues() == 1 &&
 396          "ShrinkDemandedOp only supports nodes with one result!");
 397
 398   SelectionDAG &DAG = TLO.DAG;
 399   SDLoc dl(Op);
 400
 401   // Early return, as this function cannot handle vector types.
 402   if (Op.getValueType().isVector())
 403     return false;
 404
 405   // Don't do this if the node has another user, which may require the
 406   // full value.
 407   if (!Op.getNode()->hasOneUse())
 408     return false;
 409
 410   // Search for the smallest integer type with free casts to and from
 411   // Op's type. For expedience, just check power-of-2 integer types.
 412   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 413   unsigned DemandedSize = Demanded.getActiveBits();
 414   unsigned SmallVTBits = DemandedSize;
 415   if (!isPowerOf2_32(SmallVTBits))
 416     SmallVTBits = NextPowerOf2(SmallVTBits);
 417   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
 418     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
 419     if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
 420         TLI.isZExtFree(SmallVT, Op.getValueType())) {
 421       // We found a type with free casts.
 422       SDValue X = DAG.getNode(
 423           Op.getOpcode(), dl, SmallVT,
 424           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
 425           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
 426       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
 427       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
 428       return TLO.CombineTo(Op, Z);
 429     }
 430   }
 431   return false;
 432 }
 433
 434 bool
 435 TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
 436                                      const APInt &Demanded,
 437                                      DAGCombinerInfo &DCI,
 438                                      TargetLoweringOpt &TLO) const {
 439   SDValue Op = User->getOperand(OpIdx);
 440   KnownBits Known;
 441
 442   if (!SimplifyDemandedBits(Op, Demanded, Known, TLO, 0, true))
 443     return false;
 444
 445
 446   // Old will not always be the same as Op.  For example:
 447   //
 448   // Demanded = 0xffffff
 449   // Op = i64 truncate (i32 and x, 0xffffff)
 450   // In this case simplify demand bits will want to replace the 'and' node
 451   // with the value 'x', which will give us:
 452   // Old = i32 and x, 0xffffff
 453   // New = x
 454   if (TLO.Old.hasOneUse()) {
 455     // For the one use case, we just commit the change.
 456     DCI.CommitTargetLoweringOpt(TLO);
 457     return true;
 458   }
 459
 460   // If Old has more than one use then it must be Op, because the
 461   // AssumeSingleUse flag is not propogated to recursive calls of
 462   // SimplifyDemanded bits, so the only node with multiple use that
 463   // it will attempt to combine will be Op.
 464   assert(TLO.Old == Op);
 465
 466   SmallVector <SDValue, 4> NewOps;
 467   for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
 468     if (i == OpIdx) {
 469       NewOps.push_back(TLO.New);
 470       continue;
 471     }
 472     NewOps.push_back(User->getOperand(i));
 473   }
 474   User = TLO.DAG.UpdateNodeOperands(User, NewOps);
 475   // Op has less users now, so we may be able to perform additional combines
 476   // with it.
 477   DCI.AddToWorklist(Op.getNode());
 478   // User's operands have been updated, so we may be able to do new combines
 479   // with it.
 480   DCI.AddToWorklist(User);
 481   return true;
 482 }
 483
 484 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
 485                                           DAGCombinerInfo &DCI) const {
 486
 487   SelectionDAG &DAG = DCI.DAG;
 488   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
 489                         !DCI.isBeforeLegalizeOps());
 490   KnownBits Known;
 491
 492   bool Simplified = SimplifyDemandedBits(Op, DemandedMask, Known, TLO);
 493   if (Simplified)
 494     DCI.CommitTargetLoweringOpt(TLO);
 495   return Simplified;
 496 }
 497
 498 /// Look at Op. At this point, we know that only the DemandedMask bits of the
 499 /// result of Op are ever used downstream. If we can use this information to
 500 /// simplify Op, create a new simplified DAG node and return true, returning the
 501 /// original and new nodes in Old and New. Otherwise, analyze the expression and
 502 /// return a mask of Known bits for the expression (used to simplify the
 503 /// caller).  The Known bits may only be accurate for those bits in the
 504 /// DemandedMask.
 505 bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 506                                           const APInt &DemandedMask,
 507                                           KnownBits &Known,
 508                                           TargetLoweringOpt &TLO,
 509                                           unsigned Depth,
 510                                           bool AssumeSingleUse) const {
 511   unsigned BitWidth = DemandedMask.getBitWidth();
 512   assert(Op.getScalarValueSizeInBits() == BitWidth &&
 513          "Mask size mismatches value type size!");
 514   APInt NewMask = DemandedMask;
 515   SDLoc dl(Op);
 516   auto &DL = TLO.DAG.getDataLayout();
 517
 518   // Don't know anything.
 519   Known = KnownBits(BitWidth);
 520
 521   if (Op.getOpcode() == ISD::Constant) {
 522     // We know all of the bits for a constant!
 523     Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
 524     Known.Zero = ~Known.One;
 525     return false;
 526   }
 527
 528   // Other users may use these bits.
 529   EVT VT = Op.getValueType();
 530   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
 531     if (Depth != 0) {
 532       // If not at the root, Just compute the Known bits to
 533       // simplify things downstream.
 534       TLO.DAG.computeKnownBits(Op, Known, Depth);
 535       return false;
 536     }
 537     // If this is the root being simplified, allow it to have multiple uses,
 538     // just set the NewMask to all bits.
 539     NewMask = APInt::getAllOnesValue(BitWidth);
 540   } else if (DemandedMask == 0) {
 541     // Not demanding any bits from Op.
 542     if (!Op.isUndef())
 543       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
 544     return false;
 545   } else if (Depth == 6) {        // Limit search depth.
 546     return false;
 547   }
 548
 549   KnownBits Known2, KnownOut;
 550   switch (Op.getOpcode()) {
 551   case ISD::BUILD_VECTOR:
 552     // Collect the known bits that are shared by every constant vector element.
 553     Known.Zero.setAllBits(); Known.One.setAllBits();
 554     for (SDValue SrcOp : Op->ops()) {
 555       if (!isa<ConstantSDNode>(SrcOp)) {
 556         // We can only handle all constant values - bail out with no known bits.
 557         Known = KnownBits(BitWidth);
 558         return false;
 559       }
 560       Known2.One = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
 561       Known2.Zero = ~Known2.One;
 562
 563       // BUILD_VECTOR can implicitly truncate sources, we must handle this.
 564       if (Known2.One.getBitWidth() != BitWidth) {
 565         assert(Known2.getBitWidth() > BitWidth &&
 566                "Expected BUILD_VECTOR implicit truncation");
 567         Known2 = Known2.trunc(BitWidth);
 568       }
 569
 570       // Known bits are the values that are shared by every element.
 571       // TODO: support per-element known bits.
 572       Known.One &= Known2.One;
 573       Known.Zero &= Known2.Zero;
 574     }
 575     return false;   // Don't fall through, will infinitely loop.
 576   case ISD::AND:
 577     // If the RHS is a constant, check to see if the LHS would be zero without
 578     // using the bits from the RHS.  Below, we use knowledge about the RHS to
 579     // simplify the LHS, here we're using information from the LHS to simplify
 580     // the RHS.
 581     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) {
 582       SDValue Op0 = Op.getOperand(0);
 583       KnownBits LHSKnown;
 584       // Do not increment Depth here; that can cause an infinite loop.
 585       TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth);
 586       // If the LHS already has zeros where RHSC does, this 'and' is dead.
 587       if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
 588         return TLO.CombineTo(Op, Op0);
 589
 590       // If any of the set bits in the RHS are known zero on the LHS, shrink
 591       // the constant.
 592       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & NewMask, TLO))
 593         return true;
 594
 595       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
 596       // constant, but if this 'and' is only clearing bits that were just set by
 597       // the xor, then this 'and' can be eliminated by shrinking the mask of
 598       // the xor. For example, for a 32-bit X:
 599       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
 600       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
 601           LHSKnown.One == ~RHSC->getAPIntValue()) {
 602         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0),
 603                                       Op.getOperand(1));
 604         return TLO.CombineTo(Op, Xor);
 605       }
 606     }
 607
 608     if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
 609       return true;
 610     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
 611     if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask,
 612                              Known2, TLO, Depth+1))
 613       return true;
 614     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 615
 616     // If all of the demanded bits are known one on one side, return the other.
 617     // These bits cannot contribute to the result of the 'and'.
 618     if (NewMask.isSubsetOf(Known2.Zero | Known.One))
 619       return TLO.CombineTo(Op, Op.getOperand(0));
 620     if (NewMask.isSubsetOf(Known.Zero | Known2.One))
 621       return TLO.CombineTo(Op, Op.getOperand(1));
 622     // If all of the demanded bits in the inputs are known zeros, return zero.
 623     if (NewMask.isSubsetOf(Known.Zero | Known2.Zero))
 624       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
 625     // If the RHS is a constant, see if we can simplify it.
 626     if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO))
 627       return true;
 628     // If the operation can be done in a smaller type, do so.
 629     if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
 630       return true;
 631
 632     // Output known-1 bits are only known if set in both the LHS & RHS.
 633     Known.One &= Known2.One;
 634     // Output known-0 are known to be clear if zero in either the LHS | RHS.
 635     Known.Zero |= Known2.Zero;
 636     break;
 637   case ISD::OR:
 638     if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
 639       return true;
 640     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
 641     if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask,
 642                              Known2, TLO, Depth+1))
 643       return true;
 644     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 645
 646     // If all of the demanded bits are known zero on one side, return the other.
 647     // These bits cannot contribute to the result of the 'or'.
 648     if (NewMask.isSubsetOf(Known2.One | Known.Zero))
 649       return TLO.CombineTo(Op, Op.getOperand(0));
 650     if (NewMask.isSubsetOf(Known.One | Known2.Zero))
 651       return TLO.CombineTo(Op, Op.getOperand(1));
 652     // If the RHS is a constant, see if we can simplify it.
 653     if (ShrinkDemandedConstant(Op, NewMask, TLO))
 654       return true;
 655     // If the operation can be done in a smaller type, do so.
 656     if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
 657       return true;
 658
 659     // Output known-0 bits are only known if clear in both the LHS & RHS.
 660     Known.Zero &= Known2.Zero;
 661     // Output known-1 are known to be set if set in either the LHS | RHS.
 662     Known.One |= Known2.One;
 663     break;
 664   case ISD::XOR: {
 665     if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
 666       return true;
 667     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
 668     if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1))
 669       return true;
 670     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 671
 672     // If all of the demanded bits are known zero on one side, return the other.
 673     // These bits cannot contribute to the result of the 'xor'.
 674     if (NewMask.isSubsetOf(Known.Zero))
 675       return TLO.CombineTo(Op, Op.getOperand(0));
 676     if (NewMask.isSubsetOf(Known2.Zero))
 677       return TLO.CombineTo(Op, Op.getOperand(1));
 678     // If the operation can be done in a smaller type, do so.
 679     if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
 680       return true;
 681
 682     // If all of the unknown bits are known to be zero on one side or the other
 683     // (but not both) turn this into an *inclusive* or.
 684     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
 685     if (NewMask.isSubsetOf(Known.Zero|Known2.Zero))
 686       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT,
 687                                                Op.getOperand(0),
 688                                                Op.getOperand(1)));
 689
 690     // Output known-0 bits are known if clear or set in both the LHS & RHS.
 691     KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
 692     // Output known-1 are known to be set if set in only one of the LHS, RHS.
 693     KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
 694
 695     if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
 696       // If one side is a constant, and all of the known set bits on the other
 697       // side are also set in the constant, turn this into an AND, as we know
 698       // the bits will be cleared.
 699       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
 700       // NB: it is okay if more bits are known than are requested
 701       if (C->getAPIntValue() == Known2.One) {
 702         SDValue ANDC = TLO.DAG.getConstant(~C->getAPIntValue() & NewMask,
 703                                            dl, VT);
 704         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
 705                                                  Op.getOperand(0), ANDC));
 706       }
 707
 708       // If the RHS is a constant, see if we can change it. Don't alter a -1
 709       // constant because that's a 'not' op, and that is better for combining
 710       // and codegen.
 711       if (!C->isAllOnesValue()) {
 712         if (NewMask.isSubsetOf(C->getAPIntValue())) {
 713           // We're flipping all demanded bits. Flip the undemanded bits too.
 714           SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), VT);
 715           return TLO.CombineTo(Op, New);
 716         }
 717         // If we can't turn this into a 'not', try to shrink the constant.
 718         if (ShrinkDemandedConstant(Op, NewMask, TLO))
 719           return true;
 720       }
 721     }
 722
 723     Known = std::move(KnownOut);
 724     break;
 725   }
 726   case ISD::SELECT:
 727     if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known, TLO, Depth+1))
 728       return true;
 729     if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1))
 730       return true;
 731     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
 732     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 733
 734     // If the operands are constants, see if we can simplify them.
 735     if (ShrinkDemandedConstant(Op, NewMask, TLO))
 736       return true;
 737
 738     // Only known if known in both the LHS and RHS.
 739     Known.One &= Known2.One;
 740     Known.Zero &= Known2.Zero;
 741     break;
 742   case ISD::SELECT_CC:
 743     if (SimplifyDemandedBits(Op.getOperand(3), NewMask, Known, TLO, Depth+1))
 744       return true;
 745     if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1))
 746       return true;
 747     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
 748     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
 749
 750     // If the operands are constants, see if we can simplify them.
 751     if (ShrinkDemandedConstant(Op, NewMask, TLO))
 752       return true;
 753
 754     // Only known if known in both the LHS and RHS.
 755     Known.One &= Known2.One;
 756     Known.Zero &= Known2.Zero;
 757     break;
 758   case ISD::SETCC: {
 759     SDValue Op0 = Op.getOperand(0);
 760     SDValue Op1 = Op.getOperand(1);
 761     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
 762     // If (1) we only need the sign-bit, (2) the setcc operands are the same
 763     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
 764     // -1, we may be able to bypass the setcc.
 765     if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth &&
 766         getBooleanContents(VT) ==
 767             BooleanContent::ZeroOrNegativeOneBooleanContent) {
 768       // If we're testing X < 0, then this compare isn't needed - just use X!
 769       // FIXME: We're limiting to integer types here, but this should also work
 770       // if we don't care about FP signed-zero. The use of SETLT with FP means
 771       // that we don't care about NaNs.
 772       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
 773           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
 774         return TLO.CombineTo(Op, Op0);
 775
 776       // TODO: Should we check for other forms of sign-bit comparisons?
 777       // Examples: X <= -1, X >= 0
 778     }
 779     if (getBooleanContents(Op0.getValueType()) ==
 780             TargetLowering::ZeroOrOneBooleanContent &&
 781         BitWidth > 1)
 782       Known.Zero.setBitsFrom(1);
 783     break;
 784   }
 785   case ISD::SHL:
 786     if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
 787       SDValue InOp = Op.getOperand(0);
 788
 789       // If the shift count is an invalid immediate, don't do anything.
 790       if (SA->getAPIntValue().uge(BitWidth))
 791         break;
 792
 793       unsigned ShAmt = SA->getZExtValue();
 794
 795       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
 796       // single shift.  We can do this if the bottom bits (which are shifted
 797       // out) are never demanded.
 798       if (InOp.getOpcode() == ISD::SRL) {
 799         if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
 800           if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
 801             if (SA2->getAPIntValue().ult(BitWidth)) {
 802               unsigned C1 = SA2->getZExtValue();
 803               unsigned Opc = ISD::SHL;
 804               int Diff = ShAmt-C1;
 805               if (Diff < 0) {
 806                 Diff = -Diff;
 807                 Opc = ISD::SRL;
 808               }
 809
 810               SDValue NewSA =
 811                 TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
 812               return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
 813                                                        InOp.getOperand(0),
 814                                                        NewSA));
 815             }
 816           }
 817         }
 818       }
 819
 820       if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), Known, TLO, Depth+1))
 821         return true;
 822
 823       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
 824       // are not demanded. This will likely allow the anyext to be folded away.
 825       if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
 826         SDValue InnerOp = InOp.getOperand(0);
 827         EVT InnerVT = InnerOp.getValueType();
 828         unsigned InnerBits = InnerVT.getScalarSizeInBits();
 829         if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits &&
 830             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
 831           EVT ShTy = getShiftAmountTy(InnerVT, DL);
 832           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
 833             ShTy = InnerVT;
 834           SDValue NarrowShl =
 835             TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
 836                             TLO.DAG.getConstant(ShAmt, dl, ShTy));
 837           return
 838             TLO.CombineTo(Op,
 839                           TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
 840         }
 841         // Repeat the SHL optimization above in cases where an extension
 842         // intervenes: (shl (anyext (shr x, c1)), c2) to
 843         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
 844         // aren't demanded (as above) and that the shifted upper c1 bits of
 845         // x aren't demanded.
 846         if (InOp.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
 847             InnerOp.hasOneUse()) {
 848           if (ConstantSDNode *SA2 = isConstOrConstSplat(InnerOp.getOperand(1))) {
 849             unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
 850             if (InnerShAmt < ShAmt &&
 851                 InnerShAmt < InnerBits &&
 852                 NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
 853                 NewMask.countTrailingZeros() >= ShAmt) {
 854               SDValue NewSA =
 855                 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
 856                                     Op.getOperand(1).getValueType());
 857               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
 858                                                InnerOp.getOperand(0));
 859               return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
 860                                                        NewExt, NewSA));
 861             }
 862           }
 863         }
 864       }
 865
 866       Known.Zero <<= ShAmt;
 867       Known.One  <<= ShAmt;
 868       // low bits known zero.
 869       Known.Zero.setLowBits(ShAmt);
 870     }
 871     break;
 872   case ISD::SRL:
 873     if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
 874       SDValue InOp = Op.getOperand(0);
 875
 876       // If the shift count is an invalid immediate, don't do anything.
 877       if (SA->getAPIntValue().uge(BitWidth))
 878         break;
 879
 880       unsigned ShAmt = SA->getZExtValue();
 881       APInt InDemandedMask = (NewMask << ShAmt);
 882
 883       // If the shift is exact, then it does demand the low bits (and knows that
 884       // they are zero).
 885       if (Op->getFlags().hasExact())
 886         InDemandedMask.setLowBits(ShAmt);
 887
 888       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
 889       // single shift.  We can do this if the top bits (which are shifted out)
 890       // are never demanded.
 891       if (InOp.getOpcode() == ISD::SHL) {
 892         if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
 893           if (ShAmt &&
 894               (NewMask & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
 895             if (SA2->getAPIntValue().ult(BitWidth)) {
 896               unsigned C1 = SA2->getZExtValue();
 897               unsigned Opc = ISD::SRL;
 898               int Diff = ShAmt-C1;
 899               if (Diff < 0) {
 900                 Diff = -Diff;
 901                 Opc = ISD::SHL;
 902               }
 903
 904               SDValue NewSA =
 905                 TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
 906               return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
 907                                                        InOp.getOperand(0),
 908                                                        NewSA));
 909             }
 910           }
 911         }
 912       }
 913
 914       // Compute the new bits that are at the top now.
 915       if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1))
 916         return true;
 917       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
 918       Known.Zero.lshrInPlace(ShAmt);
 919       Known.One.lshrInPlace(ShAmt);
 920
 921       Known.Zero.setHighBits(ShAmt);  // High bits known zero.
 922     }
 923     break;
 924   case ISD::SRA:
 925     // If this is an arithmetic shift right and only the low-bit is set, we can
 926     // always convert this into a logical shr, even if the shift amount is
 927     // variable.  The low bit of the shift cannot be an input sign bit unless
 928     // the shift amount is >= the size of the datatype, which is undefined.
 929     if (NewMask.isOneValue())
 930       return TLO.CombineTo(Op,
 931                            TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
 932                                            Op.getOperand(1)));
 933
 934     if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
 935       // If the shift count is an invalid immediate, don't do anything.
 936       if (SA->getAPIntValue().uge(BitWidth))
 937         break;
 938
 939       unsigned ShAmt = SA->getZExtValue();
 940       APInt InDemandedMask = (NewMask << ShAmt);
 941
 942       // If the shift is exact, then it does demand the low bits (and knows that
 943       // they are zero).
 944       if (Op->getFlags().hasExact())
 945         InDemandedMask.setLowBits(ShAmt);
 946
 947       // If any of the demanded bits are produced by the sign extension, we also
 948       // demand the input sign bit.
 949       if (NewMask.countLeadingZeros() < ShAmt)
 950         InDemandedMask.setSignBit();
 951
 952       if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO,
 953                                Depth+1))
 954         return true;
 955       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
 956       Known.Zero.lshrInPlace(ShAmt);
 957       Known.One.lshrInPlace(ShAmt);
 958
 959       // If the input sign bit is known to be zero, or if none of the top bits
 960       // are demanded, turn this into an unsigned shift right.
 961       if (Known.Zero[BitWidth - ShAmt - 1] ||
 962           NewMask.countLeadingZeros() >= ShAmt) {
 963         SDNodeFlags Flags;
 964         Flags.setExact(Op->getFlags().hasExact());
 965         return TLO.CombineTo(Op,
 966                              TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
 967                                              Op.getOperand(1), Flags));
 968       }
 969
 970       int Log2 = NewMask.exactLogBase2();
 971       if (Log2 >= 0) {
 972         // The bit must come from the sign.
 973         SDValue NewSA =
 974           TLO.DAG.getConstant(BitWidth - 1 - Log2, dl,
 975                               Op.getOperand(1).getValueType());
 976         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
 977                                                  Op.getOperand(0), NewSA));
 978       }
 979
 980       if (Known.One[BitWidth - ShAmt - 1])
 981         // New bits are known one.
 982         Known.One.setHighBits(ShAmt);
 983     }
 984     break;
 985   case ISD::SIGN_EXTEND_INREG: {
 986     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
 987     unsigned ExVTBits = ExVT.getScalarSizeInBits();
 988
 989     // If we only care about the highest bit, don't bother shifting right.
 990     if (NewMask.isSignMask()) {
 991       SDValue InOp = Op.getOperand(0);
 992       bool AlreadySignExtended =
 993         TLO.DAG.ComputeNumSignBits(InOp) >= BitWidth-ExVTBits+1;
 994       // However if the input is already sign extended we expect the sign
 995       // extension to be dropped altogether later and do not simplify.
 996       if (!AlreadySignExtended) {
 997         // Compute the correct shift amount type, which must be getShiftAmountTy
 998         // for scalar types after legalization.
 999         EVT ShiftAmtTy = VT;
1000         if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1001           ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1002
1003         SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
1004                                                ShiftAmtTy);
1005         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, InOp,
1006                                                  ShiftAmt));
1007       }
1008     }
1009
1010     // If none of the extended bits are demanded, eliminate the sextinreg.
1011     if (NewMask.getActiveBits() <= ExVTBits)
1012       return TLO.CombineTo(Op, Op.getOperand(0));
1013
1014     APInt InputDemandedBits = NewMask.getLoBits(ExVTBits);
1015
1016     // Since the sign extended bits are demanded, we know that the sign
1017     // bit is demanded.
1018     InputDemandedBits.setBit(ExVTBits - 1);
1019
1020     if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
1021                              Known, TLO, Depth+1))
1022       return true;
1023     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1024
1025     // If the sign bit of the input is known set or clear, then we know the
1026     // top bits of the result.
1027
1028     // If the input sign bit is known zero, convert this into a zero extension.
1029     if (Known.Zero[ExVTBits - 1])
1030       return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
1031                                    Op.getOperand(0), dl, ExVT.getScalarType()));
1032
1033     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1034     if (Known.One[ExVTBits - 1]) {    // Input sign bit known set
1035       Known.One.setBitsFrom(ExVTBits);
1036       Known.Zero &= Mask;
1037     } else {                       // Input sign bit unknown
1038       Known.Zero &= Mask;
1039       Known.One &= Mask;
1040     }
1041     break;
1042   }
1043   case ISD::BUILD_PAIR: {
1044     EVT HalfVT = Op.getOperand(0).getValueType();
1045     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1046
1047     APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1048     APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1049
1050     KnownBits KnownLo, KnownHi;
1051
1052     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1053       return true;
1054
1055     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1056       return true;
1057
1058     Known.Zero = KnownLo.Zero.zext(BitWidth) |
1059                 KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1060
1061     Known.One = KnownLo.One.zext(BitWidth) |
1062                KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1063     break;
1064   }
1065   case ISD::ZERO_EXTEND: {
1066     unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
1067
1068     // If none of the top bits are demanded, convert this into an any_extend.
1069     if (NewMask.getActiveBits() <= OperandBitWidth)
1070       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1071                                                Op.getOperand(0)));
1072
1073     APInt InMask = NewMask.trunc(OperandBitWidth);
1074     if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
1075       return true;
1076     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1077     Known = Known.zext(BitWidth);
1078     Known.Zero.setBitsFrom(OperandBitWidth);
1079     break;
1080   }
1081   case ISD::SIGN_EXTEND: {
1082     unsigned InBits = Op.getOperand(0).getValueType().getScalarSizeInBits();
1083
1084     // If none of the top bits are demanded, convert this into an any_extend.
1085     if (NewMask.getActiveBits() <= InBits)
1086       return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1087                                               Op.getOperand(0)));
1088
1089     // Since some of the sign extended bits are demanded, we know that the sign
1090     // bit is demanded.
1091     APInt InDemandedBits = NewMask.trunc(InBits);
1092     InDemandedBits.setBit(InBits - 1);
1093
1094     if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO,
1095                              Depth+1))
1096       return true;
1097     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1098     // If the sign bit is known one, the top bits match.
1099     Known = Known.sext(BitWidth);
1100
1101     // If the sign bit is known zero, convert this to a zero extend.
1102     if (Known.isNonNegative())
1103       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT,
1104                                                Op.getOperand(0)));
1105     break;
1106   }
1107   case ISD::ANY_EXTEND: {
1108     unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
1109     APInt InMask = NewMask.trunc(OperandBitWidth);
1110     if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
1111       return true;
1112     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1113     Known = Known.zext(BitWidth);
1114     break;
1115   }
1116   case ISD::TRUNCATE: {
1117     // Simplify the input, using demanded bit information, and compute the known
1118     // zero/one bits live out.
1119     unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
1120     APInt TruncMask = NewMask.zext(OperandBitWidth);
1121     if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, Known, TLO, Depth+1))
1122       return true;
1123     Known = Known.trunc(BitWidth);
1124
1125     // If the input is only used by this truncate, see if we can shrink it based
1126     // on the known demanded bits.
1127     if (Op.getOperand(0).getNode()->hasOneUse()) {
1128       SDValue In = Op.getOperand(0);
1129       switch (In.getOpcode()) {
1130       default: break;
1131       case ISD::SRL:
1132         // Shrink SRL by a constant if none of the high bits shifted in are
1133         // demanded.
1134         if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
1135           // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1136           // undesirable.
1137           break;
1138         ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
1139         if (!ShAmt)
1140           break;
1141         SDValue Shift = In.getOperand(1);
1142         if (TLO.LegalTypes()) {
1143           uint64_t ShVal = ShAmt->getZExtValue();
1144           Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
1145         }
1146
1147         if (ShAmt->getZExtValue() < BitWidth) {
1148           APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
1149                                                  OperandBitWidth - BitWidth);
1150           HighBits.lshrInPlace(ShAmt->getZExtValue());
1151           HighBits = HighBits.trunc(BitWidth);
1152
1153           if (!(HighBits & NewMask)) {
1154             // None of the shifted in bits are needed.  Add a truncate of the
1155             // shift input, then shift it.
1156             SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT,
1157                                                In.getOperand(0));
1158             return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc,
1159                                                      Shift));
1160           }
1161         }
1162         break;
1163       }
1164     }
1165
1166     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1167     break;
1168   }
1169   case ISD::AssertZext: {
1170     // AssertZext demands all of the high bits, plus any of the low bits
1171     // demanded by its users.
1172     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1173     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
1174     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
1175                              Known, TLO, Depth+1))
1176       return true;
1177     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1178
1179     Known.Zero |= ~InMask;
1180     break;
1181   }
1182   case ISD::BITCAST: {
1183     SDValue Src = Op.getOperand(0);
1184     EVT SrcVT = Src.getValueType();
1185     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
1186
1187     // If this is an FP->Int bitcast and if the sign bit is the only
1188     // thing demanded, turn this into a FGETSIGN.
1189     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
1190         NewMask == APInt::getSignMask(Op.getValueSizeInBits()) &&
1191         SrcVT.isFloatingPoint()) {
1192       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
1193       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
1194       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
1195           SrcVT != MVT::f128) {
1196         // Cannot eliminate/lower SHL for f128 yet.
1197         EVT Ty = OpVTLegal ? VT : MVT::i32;
1198         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
1199         // place.  We expect the SHL to be eliminated by other optimizations.
1200         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
1201         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
1202         if (!OpVTLegal && OpVTSizeInBits > 32)
1203           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
1204         unsigned ShVal = Op.getValueSizeInBits() - 1;
1205         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
1206         return TLO.CombineTo(Op,
1207                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
1208       }
1209     }
1210     // If bitcast from a vector and the mask covers entire elements, see if we
1211     // can use SimplifyDemandedVectorElts.
1212     // TODO - bigendian once we have test coverage.
1213     // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
1214     if (SrcVT.isVector() && NumSrcEltBits > 1 &&
1215         (BitWidth % NumSrcEltBits) == 0 &&
1216         TLO.DAG.getDataLayout().isLittleEndian()) {
1217       unsigned Scale = BitWidth / NumSrcEltBits;
1218       auto GetDemandedSubMask = [&](APInt &DemandedSubElts) -> bool {
1219         DemandedSubElts = APInt::getNullValue(Scale);
1220         for (unsigned i = 0; i != Scale; ++i) {
1221           unsigned Offset = i * NumSrcEltBits;
1222           APInt Sub = NewMask.extractBits(NumSrcEltBits, Offset);
1223           if (Sub.isAllOnesValue())
1224             DemandedSubElts.setBit(i);
1225           else if (!Sub.isNullValue())
1226             return false;
1227         }
1228         return true;
1229       };
1230
1231       APInt DemandedSubElts;
1232       if (GetDemandedSubMask(DemandedSubElts)) {
1233         unsigned NumSrcElts = SrcVT.getVectorNumElements();
1234         APInt DemandedElts = APInt::getSplat(NumSrcElts, DemandedSubElts);
1235
1236         APInt KnownUndef, KnownZero;
1237         if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
1238                                        TLO, Depth + 1))
1239           return true;
1240       }
1241     }
1242     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
1243     // recursive call where Known may be useful to the caller.
1244     if (Depth > 0) {
1245       TLO.DAG.computeKnownBits(Op, Known, Depth);
1246       return false;
1247     }
1248     break;
1249   }
1250   case ISD::ADD:
1251   case ISD::MUL:
1252   case ISD::SUB: {
1253     // Add, Sub, and Mul don't demand any bits in positions beyond that
1254     // of the highest bit demanded of them.
1255     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
1256     unsigned NewMaskLZ = NewMask.countLeadingZeros();
1257     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMaskLZ);
1258     if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) ||
1259         SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) ||
1260         // See if the operation should be performed at a smaller bit width.
1261         ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) {
1262       SDNodeFlags Flags = Op.getNode()->getFlags();
1263       if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1264         // Disable the nsw and nuw flags. We can no longer guarantee that we
1265         // won't wrap after simplification.
1266         Flags.setNoSignedWrap(false);
1267         Flags.setNoUnsignedWrap(false);
1268         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
1269                                         Flags);
1270         return TLO.CombineTo(Op, NewOp);
1271       }
1272       return true;
1273     }
1274
1275     // If we have a constant operand, we may be able to turn it into -1 if we
1276     // do not demand the high bits. This can make the constant smaller to
1277     // encode, allow more general folding, or match specialized instruction
1278     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
1279     // is probably not useful (and could be detrimental).
1280     ConstantSDNode *C = isConstOrConstSplat(Op1);
1281     APInt HighMask = APInt::getHighBitsSet(NewMask.getBitWidth(), NewMaskLZ);
1282     if (C && !C->isAllOnesValue() && !C->isOne() &&
1283         (C->getAPIntValue() | HighMask).isAllOnesValue()) {
1284       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
1285       // We can't guarantee that the new math op doesn't wrap, so explicitly
1286       // clear those flags to prevent folding with a potential existing node
1287       // that has those flags set.
1288       SDNodeFlags Flags;
1289       Flags.setNoSignedWrap(false);
1290       Flags.setNoUnsignedWrap(false);
1291       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
1292       return TLO.CombineTo(Op, NewOp);
1293     }
1294
1295     LLVM_FALLTHROUGH;
1296   }
1297   default:
1298     // Just use computeKnownBits to compute output bits.
1299     TLO.DAG.computeKnownBits(Op, Known, Depth);
1300     break;
1301   }
1302
1303   // If we know the value of all of the demanded bits, return this as a
1304   // constant.
1305   if (NewMask.isSubsetOf(Known.Zero|Known.One)) {
1306     // Avoid folding to a constant if any OpaqueConstant is involved.
1307     const SDNode *N = Op.getNode();
1308     for (SDNodeIterator I = SDNodeIterator::begin(N),
1309          E = SDNodeIterator::end(N); I != E; ++I) {
1310       SDNode *Op = *I;
1311       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
1312         if (C->isOpaque())
1313           return false;
1314     }
1315     return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
1316   }
1317
1318   return false;
1319 }
1320
1321 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
1322                                                 const APInt &DemandedElts,
1323                                                 APInt &KnownUndef,
1324                                                 APInt &KnownZero,
1325                                                 DAGCombinerInfo &DCI) const {
1326   SelectionDAG &DAG = DCI.DAG;
1327   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
1328                         !DCI.isBeforeLegalizeOps());
1329
1330   bool Simplified =
1331       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
1332   if (Simplified)
1333     DCI.CommitTargetLoweringOpt(TLO);
1334   return Simplified;
1335 }
1336
1337 bool TargetLowering::SimplifyDemandedVectorElts(
1338     SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
1339     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
1340     bool AssumeSingleUse) const {
1341   EVT VT = Op.getValueType();
1342   APInt DemandedElts = DemandedEltMask;
1343   unsigned NumElts = DemandedElts.getBitWidth();
1344   assert(VT.isVector() && "Expected vector op");
1345   assert(VT.getVectorNumElements() == NumElts &&
1346          "Mask size mismatches value type element count!");
1347
1348   KnownUndef = KnownZero = APInt::getNullValue(NumElts);
1349
1350   // Undef operand.
1351   if (Op.isUndef()) {
1352     KnownUndef.setAllBits();
1353     return false;
1354   }
1355
1356   // If Op has other users, assume that all elements are needed.
1357   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
1358     DemandedElts.setAllBits();
1359
1360   // Not demanding any elements from Op.
1361   if (DemandedElts == 0) {
1362     KnownUndef.setAllBits();
1363     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1364   }
1365
1366   // Limit search depth.
1367   if (Depth >= 6)
1368     return false;
1369
1370   SDLoc DL(Op);
1371   unsigned EltSizeInBits = VT.getScalarSizeInBits();
1372
1373   switch (Op.getOpcode()) {
1374   case ISD::SCALAR_TO_VECTOR: {
1375     if (!DemandedElts[0]) {
1376       KnownUndef.setAllBits();
1377       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1378     }
1379     KnownUndef.setHighBits(NumElts - 1);
1380     break;
1381   }
1382   case ISD::BITCAST: {
1383     SDValue Src = Op.getOperand(0);
1384     EVT SrcVT = Src.getValueType();
1385
1386     // We only handle vectors here.
1387     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
1388     if (!SrcVT.isVector())
1389       break;
1390
1391     // Fast handling of 'identity' bitcasts.
1392     unsigned NumSrcElts = SrcVT.getVectorNumElements();
1393     if (NumSrcElts == NumElts)
1394       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
1395                                         KnownZero, TLO, Depth + 1);
1396
1397     APInt SrcZero, SrcUndef;
1398     APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
1399
1400     // Bitcast from 'large element' src vector to 'small element' vector, we
1401     // must demand a source element if any DemandedElt maps to it.
1402     if ((NumElts % NumSrcElts) == 0) {
1403       unsigned Scale = NumElts / NumSrcElts;
1404       for (unsigned i = 0; i != NumElts; ++i)
1405         if (DemandedElts[i])
1406           SrcDemandedElts.setBit(i / Scale);
1407
1408       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
1409                                      TLO, Depth + 1))
1410         return true;
1411
1412       // If the src element is zero/undef then all the output elements will be -
1413       // only demanded elements are guaranteed to be correct.
1414       for (unsigned i = 0; i != NumSrcElts; ++i) {
1415         if (SrcDemandedElts[i]) {
1416           if (SrcZero[i])
1417             KnownZero.setBits(i * Scale, (i + 1) * Scale);
1418           if (SrcUndef[i])
1419             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
1420         }
1421       }
1422     }
1423
1424     // Bitcast from 'small element' src vector to 'large element' vector, we
1425     // demand all smaller source elements covered by the larger demanded element
1426     // of this vector.
1427     if ((NumSrcElts % NumElts) == 0) {
1428       unsigned Scale = NumSrcElts / NumElts;
1429       for (unsigned i = 0; i != NumElts; ++i)
1430         if (DemandedElts[i])
1431           SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
1432
1433       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
1434                                      TLO, Depth + 1))
1435         return true;
1436
1437       // If all the src elements covering an output element are zero/undef, then
1438       // the output element will be as well, assuming it was demanded.
1439       for (unsigned i = 0; i != NumElts; ++i) {
1440         if (DemandedElts[i]) {
1441           if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
1442             KnownZero.setBit(i);
1443           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
1444             KnownUndef.setBit(i);
1445         }
1446       }
1447     }
1448     break;
1449   }
1450   case ISD::BUILD_VECTOR: {
1451     // Check all elements and simplify any unused elements with UNDEF.
1452     if (!DemandedElts.isAllOnesValue()) {
1453       // Don't simplify BROADCASTS.
1454       if (llvm::any_of(Op->op_values(),
1455                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
1456         SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
1457         bool Updated = false;
1458         for (unsigned i = 0; i != NumElts; ++i) {
1459           if (!DemandedElts[i] && !Ops[i].isUndef()) {
1460             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
1461             KnownUndef.setBit(i);
1462             Updated = true;
1463           }
1464         }
1465         if (Updated)
1466           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
1467       }
1468     }
1469     for (unsigned i = 0; i != NumElts; ++i) {
1470       SDValue SrcOp = Op.getOperand(i);
1471       if (SrcOp.isUndef()) {
1472         KnownUndef.setBit(i);
1473       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
1474                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
1475         KnownZero.setBit(i);
1476       }
1477     }
1478     break;
1479   }
1480   case ISD::CONCAT_VECTORS: {
1481     EVT SubVT = Op.getOperand(0).getValueType();
1482     unsigned NumSubVecs = Op.getNumOperands();
1483     unsigned NumSubElts = SubVT.getVectorNumElements();
1484     for (unsigned i = 0; i != NumSubVecs; ++i) {
1485       SDValue SubOp = Op.getOperand(i);
1486       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1487       APInt SubUndef, SubZero;
1488       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
1489                                      Depth + 1))
1490         return true;
1491       KnownUndef.insertBits(SubUndef, i * NumSubElts);
1492       KnownZero.insertBits(SubZero, i * NumSubElts);
1493     }
1494     break;
1495   }
1496   case ISD::INSERT_SUBVECTOR: {
1497     if (!isa<ConstantSDNode>(Op.getOperand(2)))
1498       break;
1499     SDValue Base = Op.getOperand(0);
1500     SDValue Sub = Op.getOperand(1);
1501     EVT SubVT = Sub.getValueType();
1502     unsigned NumSubElts = SubVT.getVectorNumElements();
1503     const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue();
1504     if (Idx.uge(NumElts - NumSubElts))
1505       break;
1506     unsigned SubIdx = Idx.getZExtValue();
1507     APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
1508     APInt SubUndef, SubZero;
1509     if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
1510                                    Depth + 1))
1511       return true;
1512     APInt BaseElts = DemandedElts;
1513     BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
1514     if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
1515                                    Depth + 1))
1516       return true;
1517     KnownUndef.insertBits(SubUndef, SubIdx);
1518     KnownZero.insertBits(SubZero, SubIdx);
1519     break;
1520   }
1521   case ISD::EXTRACT_SUBVECTOR: {
1522     SDValue Src = Op.getOperand(0);
1523     ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1524     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1525     if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
1526       // Offset the demanded elts by the subvector index.
1527       uint64_t Idx = SubIdx->getZExtValue();
1528       APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
1529       APInt SrcUndef, SrcZero;
1530       if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
1531                                      Depth + 1))
1532         return true;
1533       KnownUndef = SrcUndef.extractBits(NumElts, Idx);
1534       KnownZero = SrcZero.extractBits(NumElts, Idx);
1535     }
1536     break;
1537   }
1538   case ISD::INSERT_VECTOR_ELT: {
1539     SDValue Vec = Op.getOperand(0);
1540     SDValue Scl = Op.getOperand(1);
1541     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1542
1543     // For a legal, constant insertion index, if we don't need this insertion
1544     // then strip it, else remove it from the demanded elts.
1545     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
1546       unsigned Idx = CIdx->getZExtValue();
1547       if (!DemandedElts[Idx])
1548         return TLO.CombineTo(Op, Vec);
1549       DemandedElts.clearBit(Idx);
1550
1551       if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef,
1552                                      KnownZero, TLO, Depth + 1))
1553         return true;
1554
1555       KnownUndef.clearBit(Idx);
1556       if (Scl.isUndef())
1557         KnownUndef.setBit(Idx);
1558
1559       KnownZero.clearBit(Idx);
1560       if (isNullConstant(Scl) || isNullFPConstant(Scl))
1561         KnownZero.setBit(Idx);
1562       break;
1563     }
1564
1565     APInt VecUndef, VecZero;
1566     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
1567                                    Depth + 1))
1568       return true;
1569     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
1570     break;
1571   }
1572   case ISD::VSELECT: {
1573     // Try to transform the select condition based on the current demanded
1574     // elements.
1575     // TODO: If a condition element is undef, we can choose from one arm of the
1576     //       select (and if one arm is undef, then we can propagate that to the
1577     //       result).
1578     // TODO - add support for constant vselect masks (see IR version of this).
1579     APInt UnusedUndef, UnusedZero;
1580     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
1581                                    UnusedZero, TLO, Depth + 1))
1582       return true;
1583
1584     // See if we can simplify either vselect operand.
1585     APInt DemandedLHS(DemandedElts);
1586     APInt DemandedRHS(DemandedElts);
1587     APInt UndefLHS, ZeroLHS;
1588     APInt UndefRHS, ZeroRHS;
1589     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
1590                                    ZeroLHS, TLO, Depth + 1))
1591       return true;
1592     if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
1593                                    ZeroRHS, TLO, Depth + 1))
1594       return true;
1595
1596     KnownUndef = UndefLHS & UndefRHS;
1597     KnownZero = ZeroLHS & ZeroRHS;
1598     break;
1599   }
1600   case ISD::VECTOR_SHUFFLE: {
1601     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1602
1603     // Collect demanded elements from shuffle operands..
1604     APInt DemandedLHS(NumElts, 0);
1605     APInt DemandedRHS(NumElts, 0);
1606     for (unsigned i = 0; i != NumElts; ++i) {
1607       int M = ShuffleMask[i];
1608       if (M < 0 || !DemandedElts[i])
1609         continue;
1610       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1611       if (M < (int)NumElts)
1612         DemandedLHS.setBit(M);
1613       else
1614         DemandedRHS.setBit(M - NumElts);
1615     }
1616
1617     // See if we can simplify either shuffle operand.
1618     APInt UndefLHS, ZeroLHS;
1619     APInt UndefRHS, ZeroRHS;
1620     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
1621                                    ZeroLHS, TLO, Depth + 1))
1622       return true;
1623     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
1624                                    ZeroRHS, TLO, Depth + 1))
1625       return true;
1626
1627     // Simplify mask using undef elements from LHS/RHS.
1628     bool Updated = false;
1629     bool IdentityLHS = true, IdentityRHS = true;
1630     SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
1631     for (unsigned i = 0; i != NumElts; ++i) {
1632       int &M = NewMask[i];
1633       if (M < 0)
1634         continue;
1635       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
1636           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
1637         Updated = true;
1638         M = -1;
1639       }
1640       IdentityLHS &= (M < 0) || (M == (int)i);
1641       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
1642     }
1643
1644     // Update legal shuffle masks based on demanded elements if it won't reduce
1645     // to Identity which can cause premature removal of the shuffle mask.
1646     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps &&
1647         isShuffleMaskLegal(NewMask, VT))
1648       return TLO.CombineTo(Op,
1649                            TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0),
1650                                                     Op.getOperand(1), NewMask));
1651
1652     // Propagate undef/zero elements from LHS/RHS.
1653     for (unsigned i = 0; i != NumElts; ++i) {
1654       int M = ShuffleMask[i];
1655       if (M < 0) {
1656         KnownUndef.setBit(i);
1657       } else if (M < (int)NumElts) {
1658         if (UndefLHS[M])
1659           KnownUndef.setBit(i);
1660         if (ZeroLHS[M])
1661           KnownZero.setBit(i);
1662       } else {
1663         if (UndefRHS[M - NumElts])
1664           KnownUndef.setBit(i);
1665         if (ZeroRHS[M - NumElts])
1666           KnownZero.setBit(i);
1667       }
1668     }
1669     break;
1670   }
1671   case ISD::ADD:
1672   case ISD::SUB: {
1673     APInt SrcUndef, SrcZero;
1674     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
1675                                    SrcZero, TLO, Depth + 1))
1676       return true;
1677     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
1678                                    KnownZero, TLO, Depth + 1))
1679       return true;
1680     KnownZero &= SrcZero;
1681     KnownUndef &= SrcUndef;
1682     break;
1683   }
1684   case ISD::TRUNCATE:
1685     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
1686                                    KnownZero, TLO, Depth + 1))
1687       return true;
1688     break;
1689   default: {
1690     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
1691       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
1692                                                   KnownZero, TLO, Depth))
1693         return true;
1694     break;
1695   }
1696   }
1697
1698   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
1699   return false;
1700 }
1701
1702 /// Determine which of the bits specified in Mask are known to be either zero or
1703 /// one and return them in the Known.
1704 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
1705                                                    KnownBits &Known,
1706                                                    const APInt &DemandedElts,
1707                                                    const SelectionDAG &DAG,
1708                                                    unsigned Depth) const {
1709   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1710           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1711           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1712           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1713          "Should use MaskedValueIsZero if you don't know whether Op"
1714          " is a target node!");
1715   Known.resetAll();
1716 }
1717
1718 void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
1719                                                    KnownBits &Known,
1720                                                    const APInt &DemandedElts,
1721                                                    const SelectionDAG &DAG,
1722                                                    unsigned Depth) const {
1723   assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
1724
1725   if (unsigned Align = DAG.InferPtrAlignment(Op)) {
1726     // The low bits are known zero if the pointer is aligned.
1727     Known.Zero.setLowBits(Log2_32(Align));
1728   }
1729 }
1730
1731 /// This method can be implemented by targets that want to expose additional
1732 /// information about sign bits to the DAG Combiner.
1733 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
1734                                                          const APInt &,
1735                                                          const SelectionDAG &,
1736                                                          unsigned Depth) const {
1737   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1738           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1739           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1740           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1741          "Should use ComputeNumSignBits if you don't know whether Op"
1742          " is a target node!");
1743   return 1;
1744 }
1745
1746 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
1747     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
1748     TargetLoweringOpt &TLO, unsigned Depth) const {
1749   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1750           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1751           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1752           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1753          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
1754          " is a target node!");
1755   return false;
1756 }
1757
1758 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
1759                                                   const SelectionDAG &DAG,
1760                                                   bool SNaN,
1761                                                   unsigned Depth) const {
1762   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1763           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1764           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1765           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1766          "Should use isKnownNeverNaN if you don't know whether Op"
1767          " is a target node!");
1768   return false;
1769 }
1770
1771 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
1772 // work with truncating build vectors and vectors with elements of less than
1773 // 8 bits.
1774 bool TargetLowering::isConstTrueVal(const SDNode *N) const {
1775   if (!N)
1776     return false;
1777
1778   APInt CVal;
1779   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
1780     CVal = CN->getAPIntValue();
1781   } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
1782     auto *CN = BV->getConstantSplatNode();
1783     if (!CN)
1784       return false;
1785
1786     // If this is a truncating build vector, truncate the splat value.
1787     // Otherwise, we may fail to match the expected values below.
1788     unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
1789     CVal = CN->getAPIntValue();
1790     if (BVEltWidth < CVal.getBitWidth())
1791       CVal = CVal.trunc(BVEltWidth);
1792   } else {
1793     return false;
1794   }
1795
1796   switch (getBooleanContents(N->getValueType(0))) {
1797   case UndefinedBooleanContent:
1798     return CVal[0];
1799   case ZeroOrOneBooleanContent:
1800     return CVal.isOneValue();
1801   case ZeroOrNegativeOneBooleanContent:
1802     return CVal.isAllOnesValue();
1803   }
1804
1805   llvm_unreachable("Invalid boolean contents");
1806 }
1807
1808 bool TargetLowering::isConstFalseVal(const SDNode *N) const {
1809   if (!N)
1810     return false;
1811
1812   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
1813   if (!CN) {
1814     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
1815     if (!BV)
1816       return false;
1817
1818     // Only interested in constant splats, we don't care about undef
1819     // elements in identifying boolean constants and getConstantSplatNode
1820     // returns NULL if all ops are undef;
1821     CN = BV->getConstantSplatNode();
1822     if (!CN)
1823       return false;
1824   }
1825
1826   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
1827     return !CN->getAPIntValue()[0];
1828
1829   return CN->isNullValue();
1830 }
1831
1832 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
1833                                        bool SExt) const {
1834   if (VT == MVT::i1)
1835     return N->isOne();
1836
1837   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
1838   switch (Cnt) {
1839   case TargetLowering::ZeroOrOneBooleanContent:
1840     // An extended value of 1 is always true, unless its original type is i1,
1841     // in which case it will be sign extended to -1.
1842     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
1843   case TargetLowering::UndefinedBooleanContent:
1844   case TargetLowering::ZeroOrNegativeOneBooleanContent:
1845     return N->isAllOnesValue() && SExt;
1846   }
1847   llvm_unreachable("Unexpected enumeration.");
1848 }
1849
1850 /// This helper function of SimplifySetCC tries to optimize the comparison when
1851 /// either operand of the SetCC node is a bitwise-and instruction.
1852 SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
1853                                              ISD::CondCode Cond,
1854                                              DAGCombinerInfo &DCI,
1855                                              const SDLoc &DL) const {
1856   // Match these patterns in any of their permutations:
1857   // (X & Y) == Y
1858   // (X & Y) != Y
1859   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
1860     std::swap(N0, N1);
1861
1862   EVT OpVT = N0.getValueType();
1863   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
1864       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
1865     return SDValue();
1866
1867   SDValue X, Y;
1868   if (N0.getOperand(0) == N1) {
1869     X = N0.getOperand(1);
1870     Y = N0.getOperand(0);
1871   } else if (N0.getOperand(1) == N1) {
1872     X = N0.getOperand(0);
1873     Y = N0.getOperand(1);
1874   } else {
1875     return SDValue();
1876   }
1877
1878   SelectionDAG &DAG = DCI.DAG;
1879   SDValue Zero = DAG.getConstant(0, DL, OpVT);
1880   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
1881     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
1882     // Note that where Y is variable and is known to have at most one bit set
1883     // (for example, if it is Z & 1) we cannot do this; the expressions are not
1884     // equivalent when Y == 0.
1885     Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
1886     if (DCI.isBeforeLegalizeOps() ||
1887         isCondCodeLegal(Cond, N0.getSimpleValueType()))
1888       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
1889   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
1890     // If the target supports an 'and-not' or 'and-complement' logic operation,
1891     // try to use that to make a comparison operation more efficient.
1892     // But don't do this transform if the mask is a single bit because there are
1893     // more efficient ways to deal with that case (for example, 'bt' on x86 or
1894     // 'rlwinm' on PPC).
1895
1896     // Bail out if the compare operand that we want to turn into a zero is
1897     // already a zero (otherwise, infinite loop).
1898     auto *YConst = dyn_cast<ConstantSDNode>(Y);
1899     if (YConst && YConst->isNullValue())
1900       return SDValue();
1901
1902     // Transform this into: ~X & Y == 0.
1903     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
1904     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
1905     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
1906   }
1907
1908   return SDValue();
1909 }
1910
1911 /// There are multiple IR patterns that could be checking whether certain
1912 /// truncation of a signed number would be lossy or not. The pattern which is
1913 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
1914 /// We are looking for the following pattern: (KeptBits is a constant)
1915 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
1916 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
1917 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
1918 /// We will unfold it into the natural trunc+sext pattern:
1919 ///   ((%x << C) a>> C) dstcond %x
1920 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
1921 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
1922     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
1923     const SDLoc &DL) const {
1924   // We must be comparing with a constant.
1925   ConstantSDNode *C1;
1926   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
1927     return SDValue();
1928
1929   // N0 should be:  add %x, (1 << (KeptBits-1))
1930   if (N0->getOpcode() != ISD::ADD)
1931     return SDValue();
1932
1933   // And we must be 'add'ing a constant.
1934   ConstantSDNode *C01;
1935   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
1936     return SDValue();
1937
1938   SDValue X = N0->getOperand(0);
1939   EVT XVT = X.getValueType();
1940
1941   // Validate constants ...
1942
1943   APInt I1 = C1->getAPIntValue();
1944
1945   ISD::CondCode NewCond;
1946   if (Cond == ISD::CondCode::SETULT) {
1947     NewCond = ISD::CondCode::SETEQ;
1948   } else if (Cond == ISD::CondCode::SETULE) {
1949     NewCond = ISD::CondCode::SETEQ;
1950     // But need to 'canonicalize' the constant.
1951     I1 += 1;
1952   } else if (Cond == ISD::CondCode::SETUGT) {
1953     NewCond = ISD::CondCode::SETNE;
1954     // But need to 'canonicalize' the constant.
1955     I1 += 1;
1956   } else if (Cond == ISD::CondCode::SETUGE) {
1957     NewCond = ISD::CondCode::SETNE;
1958   } else
1959     return SDValue();
1960
1961   APInt I01 = C01->getAPIntValue();
1962
1963   auto checkConstants = [&I1, &I01]() -> bool {
1964     // Both of them must be power-of-two, and the constant from setcc is bigger.
1965     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
1966   };
1967
1968   if (checkConstants()) {
1969     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
1970   } else {
1971     // What if we invert constants? (and the target predicate)
1972     I1.negate();
1973     I01.negate();
1974     NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
1975     if (!checkConstants())
1976       return SDValue();
1977     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
1978   }
1979
1980   // They are power-of-two, so which bit is set?
1981   const unsigned KeptBits = I1.logBase2();
1982   const unsigned KeptBitsMinusOne = I01.logBase2();
1983
1984   // Magic!
1985   if (KeptBits != (KeptBitsMinusOne + 1))
1986     return SDValue();
1987   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
1988
1989   // We don't want to do this in every single case.
1990   SelectionDAG &DAG = DCI.DAG;
1991   if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
1992           XVT, KeptBits))
1993     return SDValue();
1994
1995   const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
1996   assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
1997
1998   // Unfold into:  ((%x << C) a>> C) cond %x
1999   // Where 'cond' will be either 'eq' or 'ne'.
2000   SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
2001   SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
2002   SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
2003   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
2004
2005   return T2;
2006 }
2007
2008 /// Try to simplify a setcc built with the specified operands and cc. If it is
2009 /// unable to simplify it, return a null SDValue.
2010 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
2011                                       ISD::CondCode Cond, bool foldBooleans,
2012                                       DAGCombinerInfo &DCI,
2013                                       const SDLoc &dl) const {
2014   SelectionDAG &DAG = DCI.DAG;
2015   EVT OpVT = N0.getValueType();
2016
2017   // These setcc operations always fold.
2018   switch (Cond) {
2019   default: break;
2020   case ISD::SETFALSE:
2021   case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT);
2022   case ISD::SETTRUE:
2023   case ISD::SETTRUE2:  return DAG.getBoolConstant(true, dl, VT, OpVT);
2024   }
2025
2026   // Ensure that the constant occurs on the RHS and fold constant comparisons.
2027   // TODO: Handle non-splat vector constants. All undef causes trouble.
2028   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
2029   if (isConstOrConstSplat(N0) &&
2030       (DCI.isBeforeLegalizeOps() ||
2031        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
2032     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
2033
2034   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
2035     const APInt &C1 = N1C->getAPIntValue();
2036
2037     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
2038     // equality comparison, then we're just comparing whether X itself is
2039     // zero.
2040     if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
2041         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
2042         N0.getOperand(1).getOpcode() == ISD::Constant) {
2043       const APInt &ShAmt
2044         = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
2045       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2046           ShAmt == Log2_32(N0.getValueSizeInBits())) {
2047         if ((C1 == 0) == (Cond == ISD::SETEQ)) {
2048           // (srl (ctlz x), 5) == 0  -> X != 0
2049           // (srl (ctlz x), 5) != 1  -> X != 0
2050           Cond = ISD::SETNE;
2051         } else {
2052           // (srl (ctlz x), 5) != 0  -> X == 0
2053           // (srl (ctlz x), 5) == 1  -> X == 0
2054           Cond = ISD::SETEQ;
2055         }
2056         SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
2057         return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
2058                             Zero, Cond);
2059       }
2060     }
2061
2062     SDValue CTPOP = N0;
2063     // Look through truncs that don't change the value of a ctpop.
2064     if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
2065       CTPOP = N0.getOperand(0);
2066
2067     if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
2068         (N0 == CTPOP ||
2069          N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
2070       EVT CTVT = CTPOP.getValueType();
2071       SDValue CTOp = CTPOP.getOperand(0);
2072
2073       // (ctpop x) u< 2 -> (x & x-1) == 0
2074       // (ctpop x) u> 1 -> (x & x-1) != 0
2075       if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
2076         SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
2077                                   DAG.getConstant(1, dl, CTVT));
2078         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
2079         ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
2080         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
2081       }
2082
2083       // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
2084     }
2085
2086     // (zext x) == C --> x == (trunc C)
2087     // (sext x) == C --> x == (trunc C)
2088     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2089         DCI.isBeforeLegalize() && N0->hasOneUse()) {
2090       unsigned MinBits = N0.getValueSizeInBits();
2091       SDValue PreExt;
2092       bool Signed = false;
2093       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
2094         // ZExt
2095         MinBits = N0->getOperand(0).getValueSizeInBits();
2096         PreExt = N0->getOperand(0);
2097       } else if (N0->getOpcode() == ISD::AND) {
2098         // DAGCombine turns costly ZExts into ANDs
2099         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
2100           if ((C->getAPIntValue()+1).isPowerOf2()) {
2101             MinBits = C->getAPIntValue().countTrailingOnes();
2102             PreExt = N0->getOperand(0);
2103           }
2104       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
2105         // SExt
2106         MinBits = N0->getOperand(0).getValueSizeInBits();
2107         PreExt = N0->getOperand(0);
2108         Signed = true;
2109       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
2110         // ZEXTLOAD / SEXTLOAD
2111         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
2112           MinBits = LN0->getMemoryVT().getSizeInBits();
2113           PreExt = N0;
2114         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
2115           Signed = true;
2116           MinBits = LN0->getMemoryVT().getSizeInBits();
2117           PreExt = N0;
2118         }
2119       }
2120
2121       // Figure out how many bits we need to preserve this constant.
2122       unsigned ReqdBits = Signed ?
2123         C1.getBitWidth() - C1.getNumSignBits() + 1 :
2124         C1.getActiveBits();
2125
2126       // Make sure we're not losing bits from the constant.
2127       if (MinBits > 0 &&
2128           MinBits < C1.getBitWidth() &&
2129           MinBits >= ReqdBits) {
2130         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
2131         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
2132           // Will get folded away.
2133           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
2134           if (MinBits == 1 && C1 == 1)
2135             // Invert the condition.
2136             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
2137                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2138           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
2139           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
2140         }
2141
2142         // If truncating the setcc operands is not desirable, we can still
2143         // simplify the expression in some cases:
2144         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
2145         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
2146         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
2147         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
2148         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
2149         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
2150         SDValue TopSetCC = N0->getOperand(0);
2151         unsigned N0Opc = N0->getOpcode();
2152         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
2153         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
2154             TopSetCC.getOpcode() == ISD::SETCC &&
2155             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
2156             (isConstFalseVal(N1C) ||
2157              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
2158
2159           bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
2160                          (!N1C->isNullValue() && Cond == ISD::SETNE);
2161
2162           if (!Inverse)
2163             return TopSetCC;
2164
2165           ISD::CondCode InvCond = ISD::getSetCCInverse(
2166               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
2167               TopSetCC.getOperand(0).getValueType().isInteger());
2168           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
2169                                       TopSetCC.getOperand(1),
2170                                       InvCond);
2171         }
2172       }
2173     }
2174
2175     // If the LHS is '(and load, const)', the RHS is 0, the test is for
2176     // equality or unsigned, and all 1 bits of the const are in the same
2177     // partial word, see if we can shorten the load.
2178     if (DCI.isBeforeLegalize() &&
2179         !ISD::isSignedIntSetCC(Cond) &&
2180         N0.getOpcode() == ISD::AND && C1 == 0 &&
2181         N0.getNode()->hasOneUse() &&
2182         isa<LoadSDNode>(N0.getOperand(0)) &&
2183         N0.getOperand(0).getNode()->hasOneUse() &&
2184         isa<ConstantSDNode>(N0.getOperand(1))) {
2185       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
2186       APInt bestMask;
2187       unsigned bestWidth = 0, bestOffset = 0;
2188       if (!Lod->isVolatile() && Lod->isUnindexed()) {
2189         unsigned origWidth = N0.getValueSizeInBits();
2190         unsigned maskWidth = origWidth;
2191         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
2192         // 8 bits, but have to be careful...
2193         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
2194           origWidth = Lod->getMemoryVT().getSizeInBits();
2195         const APInt &Mask =
2196           cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
2197         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
2198           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
2199           for (unsigned offset=0; offset<origWidth/width; offset++) {
2200             if (Mask.isSubsetOf(newMask)) {
2201               if (DAG.getDataLayout().isLittleEndian())
2202                 bestOffset = (uint64_t)offset * (width/8);
2203               else
2204                 bestOffset = (origWidth/width - offset - 1) * (width/8);
2205               bestMask = Mask.lshr(offset * (width/8) * 8);
2206               bestWidth = width;
2207               break;
2208             }
2209             newMask <<= width;
2210           }
2211         }
2212       }
2213       if (bestWidth) {
2214         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
2215         if (newVT.isRound()) {
2216           EVT PtrType = Lod->getOperand(1).getValueType();
2217           SDValue Ptr = Lod->getBasePtr();
2218           if (bestOffset != 0)
2219             Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
2220                               DAG.getConstant(bestOffset, dl, PtrType));
2221           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
2222           SDValue NewLoad = DAG.getLoad(
2223               newVT, dl, Lod->getChain(), Ptr,
2224               Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
2225           return DAG.getSetCC(dl, VT,
2226                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
2227                                       DAG.getConstant(bestMask.trunc(bestWidth),
2228                                                       dl, newVT)),
2229                               DAG.getConstant(0LL, dl, newVT), Cond);
2230         }
2231       }
2232     }
2233
2234     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
2235     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
2236       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
2237
2238       // If the comparison constant has bits in the upper part, the
2239       // zero-extended value could never match.
2240       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
2241                                               C1.getBitWidth() - InSize))) {
2242         switch (Cond) {
2243         case ISD::SETUGT:
2244         case ISD::SETUGE:
2245         case ISD::SETEQ:
2246           return DAG.getConstant(0, dl, VT);
2247         case ISD::SETULT:
2248         case ISD::SETULE:
2249         case ISD::SETNE:
2250           return DAG.getConstant(1, dl, VT);
2251         case ISD::SETGT:
2252         case ISD::SETGE:
2253           // True if the sign bit of C1 is set.
2254           return DAG.getConstant(C1.isNegative(), dl, VT);
2255         case ISD::SETLT:
2256         case ISD::SETLE:
2257           // True if the sign bit of C1 isn't set.
2258           return DAG.getConstant(C1.isNonNegative(), dl, VT);
2259         default:
2260           break;
2261         }
2262       }
2263
2264       // Otherwise, we can perform the comparison with the low bits.
2265       switch (Cond) {
2266       case ISD::SETEQ:
2267       case ISD::SETNE:
2268       case ISD::SETUGT:
2269       case ISD::SETUGE:
2270       case ISD::SETULT:
2271       case ISD::SETULE: {
2272         EVT newVT = N0.getOperand(0).getValueType();
2273         if (DCI.isBeforeLegalizeOps() ||
2274             (isOperationLegal(ISD::SETCC, newVT) &&
2275              isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
2276           EVT NewSetCCVT =
2277               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
2278           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
2279
2280           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
2281                                           NewConst, Cond);
2282           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
2283         }
2284         break;
2285       }
2286       default:
2287         break;   // todo, be more careful with signed comparisons
2288       }
2289     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2290                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
2291       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
2292       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
2293       EVT ExtDstTy = N0.getValueType();
2294       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
2295
2296       // If the constant doesn't fit into the number of bits for the source of
2297       // the sign extension, it is impossible for both sides to be equal.
2298       if (C1.getMinSignedBits() > ExtSrcTyBits)
2299         return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
2300
2301       SDValue ZextOp;
2302       EVT Op0Ty = N0.getOperand(0).getValueType();
2303       if (Op0Ty == ExtSrcTy) {
2304         ZextOp = N0.getOperand(0);
2305       } else {
2306         APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
2307         ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
2308                               DAG.getConstant(Imm, dl, Op0Ty));
2309       }
2310       if (!DCI.isCalledByLegalizer())
2311         DCI.AddToWorklist(ZextOp.getNode());
2312       // Otherwise, make this a use of a zext.
2313       return DAG.getSetCC(dl, VT, ZextOp,
2314                           DAG.getConstant(C1 & APInt::getLowBitsSet(
2315                                                               ExtDstTyBits,
2316                                                               ExtSrcTyBits),
2317                                           dl, ExtDstTy),
2318                           Cond);
2319     } else if ((N1C->isNullValue() || N1C->isOne()) &&
2320                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
2321       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
2322       if (N0.getOpcode() == ISD::SETCC &&
2323           isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
2324         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
2325         if (TrueWhenTrue)
2326           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
2327         // Invert the condition.
2328         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
2329         CC = ISD::getSetCCInverse(CC,
2330                                   N0.getOperand(0).getValueType().isInteger());
2331         if (DCI.isBeforeLegalizeOps() ||
2332             isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
2333           return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
2334       }
2335
2336       if ((N0.getOpcode() == ISD::XOR ||
2337            (N0.getOpcode() == ISD::AND &&
2338             N0.getOperand(0).getOpcode() == ISD::XOR &&
2339             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
2340           isa<ConstantSDNode>(N0.getOperand(1)) &&
2341           cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
2342         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
2343         // can only do this if the top bits are known zero.
2344         unsigned BitWidth = N0.getValueSizeInBits();
2345         if (DAG.MaskedValueIsZero(N0,
2346                                   APInt::getHighBitsSet(BitWidth,
2347                                                         BitWidth-1))) {
2348           // Okay, get the un-inverted input value.
2349           SDValue Val;
2350           if (N0.getOpcode() == ISD::XOR) {
2351             Val = N0.getOperand(0);
2352           } else {
2353             assert(N0.getOpcode() == ISD::AND &&
2354                     N0.getOperand(0).getOpcode() == ISD::XOR);
2355             // ((X^1)&1)^1 -> X & 1
2356             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
2357                               N0.getOperand(0).getOperand(0),
2358                               N0.getOperand(1));
2359           }
2360
2361           return DAG.getSetCC(dl, VT, Val, N1,
2362                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2363         }
2364       } else if (N1C->isOne() &&
2365                  (VT == MVT::i1 ||
2366                   getBooleanContents(N0->getValueType(0)) ==
2367                       ZeroOrOneBooleanContent)) {
2368         SDValue Op0 = N0;
2369         if (Op0.getOpcode() == ISD::TRUNCATE)
2370           Op0 = Op0.getOperand(0);
2371
2372         if ((Op0.getOpcode() == ISD::XOR) &&
2373             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
2374             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
2375           // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
2376           Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
2377           return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
2378                               Cond);
2379         }
2380         if (Op0.getOpcode() == ISD::AND &&
2381             isa<ConstantSDNode>(Op0.getOperand(1)) &&
2382             cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
2383           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
2384           if (Op0.getValueType().bitsGT(VT))
2385             Op0 = DAG.getNode(ISD::AND, dl, VT,
2386                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
2387                           DAG.getConstant(1, dl, VT));
2388           else if (Op0.getValueType().bitsLT(VT))
2389             Op0 = DAG.getNode(ISD::AND, dl, VT,
2390                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
2391                         DAG.getConstant(1, dl, VT));
2392
2393           return DAG.getSetCC(dl, VT, Op0,
2394                               DAG.getConstant(0, dl, Op0.getValueType()),
2395                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2396         }
2397         if (Op0.getOpcode() == ISD::AssertZext &&
2398             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
2399           return DAG.getSetCC(dl, VT, Op0,
2400                               DAG.getConstant(0, dl, Op0.getValueType()),
2401                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2402       }
2403     }
2404
2405     if (SDValue V =
2406             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
2407       return V;
2408   }
2409
2410   // These simplifications apply to splat vectors as well.
2411   // TODO: Handle more splat vector cases.
2412   if (auto *N1C = isConstOrConstSplat(N1)) {
2413     const APInt &C1 = N1C->getAPIntValue();
2414
2415     APInt MinVal, MaxVal;
2416     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
2417     if (ISD::isSignedIntSetCC(Cond)) {
2418       MinVal = APInt::getSignedMinValue(OperandBitSize);
2419       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
2420     } else {
2421       MinVal = APInt::getMinValue(OperandBitSize);
2422       MaxVal = APInt::getMaxValue(OperandBitSize);
2423     }
2424
2425     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
2426     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
2427       // X >= MIN --> true
2428       if (C1 == MinVal)
2429         return DAG.getBoolConstant(true, dl, VT, OpVT);
2430
2431       if (!VT.isVector()) { // TODO: Support this for vectors.
2432         // X >= C0 --> X > (C0 - 1)
2433         APInt C = C1 - 1;
2434         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
2435         if ((DCI.isBeforeLegalizeOps() ||
2436              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
2437             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
2438                                   isLegalICmpImmediate(C.getSExtValue())))) {
2439           return DAG.getSetCC(dl, VT, N0,
2440                               DAG.getConstant(C, dl, N1.getValueType()),
2441                               NewCC);
2442         }
2443       }
2444     }
2445
2446     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
2447       // X <= MAX --> true
2448       if (C1 == MaxVal)
2449         return DAG.getBoolConstant(true, dl, VT, OpVT);
2450
2451       // X <= C0 --> X < (C0 + 1)
2452       if (!VT.isVector()) { // TODO: Support this for vectors.
2453         APInt C = C1 + 1;
2454         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
2455         if ((DCI.isBeforeLegalizeOps() ||
2456              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
2457             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
2458                                   isLegalICmpImmediate(C.getSExtValue())))) {
2459           return DAG.getSetCC(dl, VT, N0,
2460                               DAG.getConstant(C, dl, N1.getValueType()),
2461                               NewCC);
2462         }
2463       }
2464     }
2465
2466     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
2467       if (C1 == MinVal)
2468         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
2469
2470       // TODO: Support this for vectors after legalize ops.
2471       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
2472         // Canonicalize setlt X, Max --> setne X, Max
2473         if (C1 == MaxVal)
2474           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
2475
2476         // If we have setult X, 1, turn it into seteq X, 0
2477         if (C1 == MinVal+1)
2478           return DAG.getSetCC(dl, VT, N0,
2479                               DAG.getConstant(MinVal, dl, N0.getValueType()),
2480                               ISD::SETEQ);
2481       }
2482     }
2483
2484     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
2485       if (C1 == MaxVal)
2486         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
2487
2488       // TODO: Support this for vectors after legalize ops.
2489       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
2490         // Canonicalize setgt X, Min --> setne X, Min
2491         if (C1 == MinVal)
2492           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
2493
2494         // If we have setugt X, Max-1, turn it into seteq X, Max
2495         if (C1 == MaxVal-1)
2496           return DAG.getSetCC(dl, VT, N0,
2497                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
2498                               ISD::SETEQ);
2499       }
2500     }
2501
2502     // If we have "setcc X, C0", check to see if we can shrink the immediate
2503     // by changing cc.
2504     // TODO: Support this for vectors after legalize ops.
2505     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
2506       // SETUGT X, SINTMAX  -> SETLT X, 0
2507       if (Cond == ISD::SETUGT &&
2508           C1 == APInt::getSignedMaxValue(OperandBitSize))
2509         return DAG.getSetCC(dl, VT, N0,
2510                             DAG.getConstant(0, dl, N1.getValueType()),
2511                             ISD::SETLT);
2512
2513       // SETULT X, SINTMIN  -> SETGT X, -1
2514       if (Cond == ISD::SETULT &&
2515           C1 == APInt::getSignedMinValue(OperandBitSize)) {
2516         SDValue ConstMinusOne =
2517             DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
2518                             N1.getValueType());
2519         return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
2520       }
2521     }
2522   }
2523
2524   // Back to non-vector simplifications.
2525   // TODO: Can we do these for vector splats?
2526   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
2527     const APInt &C1 = N1C->getAPIntValue();
2528
2529     // Fold bit comparisons when we can.
2530     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2531         (VT == N0.getValueType() ||
2532          (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
2533         N0.getOpcode() == ISD::AND) {
2534       auto &DL = DAG.getDataLayout();
2535       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2536         EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
2537                                        !DCI.isBeforeLegalize());
2538         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
2539           // Perform the xform if the AND RHS is a single bit.
2540           if (AndRHS->getAPIntValue().isPowerOf2()) {
2541             return DAG.getNode(ISD::TRUNCATE, dl, VT,
2542                               DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
2543                    DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
2544                                    ShiftTy)));
2545           }
2546         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
2547           // (X & 8) == 8  -->  (X & 8) >> 3
2548           // Perform the xform if C1 is a single bit.
2549           if (C1.isPowerOf2()) {
2550             return DAG.getNode(ISD::TRUNCATE, dl, VT,
2551                                DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
2552                                       DAG.getConstant(C1.logBase2(), dl,
2553                                                       ShiftTy)));
2554           }
2555         }
2556       }
2557     }
2558
2559     if (C1.getMinSignedBits() <= 64 &&
2560         !isLegalICmpImmediate(C1.getSExtValue())) {
2561       // (X & -256) == 256 -> (X >> 8) == 1
2562       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2563           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
2564         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2565           const APInt &AndRHSC = AndRHS->getAPIntValue();
2566           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
2567             unsigned ShiftBits = AndRHSC.countTrailingZeros();
2568             auto &DL = DAG.getDataLayout();
2569             EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
2570                                            !DCI.isBeforeLegalize());
2571             EVT CmpTy = N0.getValueType();
2572             SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
2573                                         DAG.getConstant(ShiftBits, dl,
2574                                                         ShiftTy));
2575             SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
2576             return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
2577           }
2578         }
2579       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
2580                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
2581         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
2582         // X <  0x100000000 -> (X >> 32) <  1
2583         // X >= 0x100000000 -> (X >> 32) >= 1
2584         // X <= 0x0ffffffff -> (X >> 32) <  1
2585         // X >  0x0ffffffff -> (X >> 32) >= 1
2586         unsigned ShiftBits;
2587         APInt NewC = C1;
2588         ISD::CondCode NewCond = Cond;
2589         if (AdjOne) {
2590           ShiftBits = C1.countTrailingOnes();
2591           NewC = NewC + 1;
2592           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2593         } else {
2594           ShiftBits = C1.countTrailingZeros();
2595         }
2596         NewC.lshrInPlace(ShiftBits);
2597         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
2598           isLegalICmpImmediate(NewC.getSExtValue())) {
2599           auto &DL = DAG.getDataLayout();
2600           EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
2601                                          !DCI.isBeforeLegalize());
2602           EVT CmpTy = N0.getValueType();
2603           SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
2604                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
2605           SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
2606           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
2607         }
2608       }
2609     }
2610   }
2611
2612   if (isa<ConstantFPSDNode>(N0.getNode())) {
2613     // Constant fold or commute setcc.
2614     SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
2615     if (O.getNode()) return O;
2616   } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
2617     // If the RHS of an FP comparison is a constant, simplify it away in
2618     // some cases.
2619     if (CFP->getValueAPF().isNaN()) {
2620       // If an operand is known to be a nan, we can fold it.
2621       switch (ISD::getUnorderedFlavor(Cond)) {
2622       default: llvm_unreachable("Unknown flavor!");
2623       case 0:  // Known false.
2624         return DAG.getBoolConstant(false, dl, VT, OpVT);
2625       case 1:  // Known true.
2626         return DAG.getBoolConstant(true, dl, VT, OpVT);
2627       case 2:  // Undefined.
2628         return DAG.getUNDEF(VT);
2629       }
2630     }
2631
2632     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
2633     // constant if knowing that the operand is non-nan is enough.  We prefer to
2634     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
2635     // materialize 0.0.
2636     if (Cond == ISD::SETO || Cond == ISD::SETUO)
2637       return DAG.getSetCC(dl, VT, N0, N0, Cond);
2638
2639     // setcc (fneg x), C -> setcc swap(pred) x, -C
2640     if (N0.getOpcode() == ISD::FNEG) {
2641       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
2642       if (DCI.isBeforeLegalizeOps() ||
2643           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
2644         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
2645         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
2646       }
2647     }
2648
2649     // If the condition is not legal, see if we can find an equivalent one
2650     // which is legal.
2651     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
2652       // If the comparison was an awkward floating-point == or != and one of
2653       // the comparison operands is infinity or negative infinity, convert the
2654       // condition to a less-awkward <= or >=.
2655       if (CFP->getValueAPF().isInfinity()) {
2656         if (CFP->getValueAPF().isNegative()) {
2657           if (Cond == ISD::SETOEQ &&
2658               isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
2659             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
2660           if (Cond == ISD::SETUEQ &&
2661               isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
2662             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
2663           if (Cond == ISD::SETUNE &&
2664               isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
2665             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
2666           if (Cond == ISD::SETONE &&
2667               isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
2668             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
2669         } else {
2670           if (Cond == ISD::SETOEQ &&
2671               isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
2672             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
2673           if (Cond == ISD::SETUEQ &&
2674               isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
2675             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
2676           if (Cond == ISD::SETUNE &&
2677               isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
2678             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
2679           if (Cond == ISD::SETONE &&
2680               isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
2681             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
2682         }
2683       }
2684     }
2685   }
2686
2687   if (N0 == N1) {
2688     // The sext(setcc()) => setcc() optimization relies on the appropriate
2689     // constant being emitted.
2690
2691     bool EqTrue = ISD::isTrueWhenEqual(Cond);
2692
2693     // We can always fold X == X for integer setcc's.
2694     if (N0.getValueType().isInteger())
2695       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
2696
2697     unsigned UOF = ISD::getUnorderedFlavor(Cond);
2698     if (UOF == 2)   // FP operators that are undefined on NaNs.
2699       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
2700     if (UOF == unsigned(EqTrue))
2701       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
2702     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
2703     // if it is not already.
2704     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
2705     if (NewCond != Cond &&
2706         (DCI.isBeforeLegalizeOps() ||
2707          isCondCodeLegal(NewCond, N0.getSimpleValueType())))
2708       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
2709   }
2710
2711   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2712       N0.getValueType().isInteger()) {
2713     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
2714         N0.getOpcode() == ISD::XOR) {
2715       // Simplify (X+Y) == (X+Z) -->  Y == Z
2716       if (N0.getOpcode() == N1.getOpcode()) {
2717         if (N0.getOperand(0) == N1.getOperand(0))
2718           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
2719         if (N0.getOperand(1) == N1.getOperand(1))
2720           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
2721         if (isCommutativeBinOp(N0.getOpcode())) {
2722           // If X op Y == Y op X, try other combinations.
2723           if (N0.getOperand(0) == N1.getOperand(1))
2724             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
2725                                 Cond);
2726           if (N0.getOperand(1) == N1.getOperand(0))
2727             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
2728                                 Cond);
2729         }
2730       }
2731
2732       // If RHS is a legal immediate value for a compare instruction, we need
2733       // to be careful about increasing register pressure needlessly.
2734       bool LegalRHSImm = false;
2735
2736       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
2737         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2738           // Turn (X+C1) == C2 --> X == C2-C1
2739           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
2740             return DAG.getSetCC(dl, VT, N0.getOperand(0),
2741                                 DAG.getConstant(RHSC->getAPIntValue()-
2742                                                 LHSR->getAPIntValue(),
2743                                 dl, N0.getValueType()), Cond);
2744           }
2745
2746           // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
2747           if (N0.getOpcode() == ISD::XOR)
2748             // If we know that all of the inverted bits are zero, don't bother
2749             // performing the inversion.
2750             if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
2751               return
2752                 DAG.getSetCC(dl, VT, N0.getOperand(0),
2753                              DAG.getConstant(LHSR->getAPIntValue() ^
2754                                                RHSC->getAPIntValue(),
2755                                              dl, N0.getValueType()),
2756                              Cond);
2757         }
2758
2759         // Turn (C1-X) == C2 --> X == C1-C2
2760         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
2761           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
2762             return
2763               DAG.getSetCC(dl, VT, N0.getOperand(1),
2764                            DAG.getConstant(SUBC->getAPIntValue() -
2765                                              RHSC->getAPIntValue(),
2766                                            dl, N0.getValueType()),
2767                            Cond);
2768           }
2769         }
2770
2771         // Could RHSC fold directly into a compare?
2772         if (RHSC->getValueType(0).getSizeInBits() <= 64)
2773           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
2774       }
2775
2776       // Simplify (X+Z) == X -->  Z == 0
2777       // Don't do this if X is an immediate that can fold into a cmp
2778       // instruction and X+Z has other uses. It could be an induction variable
2779       // chain, and the transform would increase register pressure.
2780       if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
2781         if (N0.getOperand(0) == N1)
2782           return DAG.getSetCC(dl, VT, N0.getOperand(1),
2783                               DAG.getConstant(0, dl, N0.getValueType()), Cond);
2784         if (N0.getOperand(1) == N1) {
2785           if (isCommutativeBinOp(N0.getOpcode()))
2786             return DAG.getSetCC(dl, VT, N0.getOperand(0),
2787                                 DAG.getConstant(0, dl, N0.getValueType()),
2788                                 Cond);
2789           if (N0.getNode()->hasOneUse()) {
2790             assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
2791             auto &DL = DAG.getDataLayout();
2792             // (Z-X) == X  --> Z == X<<1
2793             SDValue SH = DAG.getNode(
2794                 ISD::SHL, dl, N1.getValueType(), N1,
2795                 DAG.getConstant(1, dl,
2796                                 getShiftAmountTy(N1.getValueType(), DL,
2797                                                  !DCI.isBeforeLegalize())));
2798             if (!DCI.isCalledByLegalizer())
2799               DCI.AddToWorklist(SH.getNode());
2800             return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
2801           }
2802         }
2803       }
2804     }
2805
2806     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
2807         N1.getOpcode() == ISD::XOR) {
2808       // Simplify  X == (X+Z) -->  Z == 0
2809       if (N1.getOperand(0) == N0)
2810         return DAG.getSetCC(dl, VT, N1.getOperand(1),
2811                         DAG.getConstant(0, dl, N1.getValueType()), Cond);
2812       if (N1.getOperand(1) == N0) {
2813         if (isCommutativeBinOp(N1.getOpcode()))
2814           return DAG.getSetCC(dl, VT, N1.getOperand(0),
2815                           DAG.getConstant(0, dl, N1.getValueType()), Cond);
2816         if (N1.getNode()->hasOneUse()) {
2817           assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
2818           auto &DL = DAG.getDataLayout();
2819           // X == (Z-X)  --> X<<1 == Z
2820           SDValue SH = DAG.getNode(
2821               ISD::SHL, dl, N1.getValueType(), N0,
2822               DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL,
2823                                                       !DCI.isBeforeLegalize())));
2824           if (!DCI.isCalledByLegalizer())
2825             DCI.AddToWorklist(SH.getNode());
2826           return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
2827         }
2828       }
2829     }
2830
2831     if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl))
2832       return V;
2833   }
2834
2835   // Fold away ALL boolean setcc's.
2836   SDValue Temp;
2837   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
2838     EVT OpVT = N0.getValueType();
2839     switch (Cond) {
2840     default: llvm_unreachable("Unknown integer setcc!");
2841     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
2842       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
2843       N0 = DAG.getNOT(dl, Temp, OpVT);
2844       if (!DCI.isCalledByLegalizer())
2845         DCI.AddToWorklist(Temp.getNode());
2846       break;
2847     case ISD::SETNE:  // X != Y   -->  (X^Y)
2848       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
2849       break;
2850     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
2851     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
2852       Temp = DAG.getNOT(dl, N0, OpVT);
2853       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
2854       if (!DCI.isCalledByLegalizer())
2855         DCI.AddToWorklist(Temp.getNode());
2856       break;
2857     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
2858     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
2859       Temp = DAG.getNOT(dl, N1, OpVT);
2860       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
2861       if (!DCI.isCalledByLegalizer())
2862         DCI.AddToWorklist(Temp.getNode());
2863       break;
2864     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
2865     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
2866       Temp = DAG.getNOT(dl, N0, OpVT);
2867       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
2868       if (!DCI.isCalledByLegalizer())
2869         DCI.AddToWorklist(Temp.getNode());
2870       break;
2871     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
2872     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
2873       Temp = DAG.getNOT(dl, N1, OpVT);
2874       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
2875       break;
2876     }
2877     if (VT.getScalarType() != MVT::i1) {
2878       if (!DCI.isCalledByLegalizer())
2879         DCI.AddToWorklist(N0.getNode());
2880       // FIXME: If running after legalize, we probably can't do this.
2881       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
2882       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
2883     }
2884     return N0;
2885   }
2886
2887   // Could not fold it.
2888   return SDValue();
2889 }
2890
2891 /// Returns true (and the GlobalValue and the offset) if the node is a
2892 /// GlobalAddress + offset.
2893 bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
2894                                     int64_t &Offset) const {
2895   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
2896     GA = GASD->getGlobal();
2897     Offset += GASD->getOffset();
2898     return true;
2899   }
2900
2901   if (N->getOpcode() == ISD::ADD) {
2902     SDValue N1 = N->getOperand(0);
2903     SDValue N2 = N->getOperand(1);
2904     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
2905       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
2906         Offset += V->getSExtValue();
2907         return true;
2908       }
2909     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
2910       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
2911         Offset += V->getSExtValue();
2912         return true;
2913       }
2914     }
2915   }
2916
2917   return false;
2918 }
2919
2920 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
2921                                           DAGCombinerInfo &DCI) const {
2922   // Default implementation: no optimization.
2923   return SDValue();
2924 }
2925
2926 //===----------------------------------------------------------------------===//
2927 //  Inline Assembler Implementation Methods
2928 //===----------------------------------------------------------------------===//
2929
2930 TargetLowering::ConstraintType
2931 TargetLowering::getConstraintType(StringRef Constraint) const {
2932   unsigned S = Constraint.size();
2933
2934   if (S == 1) {
2935     switch (Constraint[0]) {
2936     default: break;
2937     case 'r': return C_RegisterClass;
2938     case 'm':    // memory
2939     case 'o':    // offsetable
2940     case 'V':    // not offsetable
2941       return C_Memory;
2942     case 'i':    // Simple Integer or Relocatable Constant
2943     case 'n':    // Simple Integer
2944     case 'E':    // Floating Point Constant
2945     case 'F':    // Floating Point Constant
2946     case 's':    // Relocatable Constant
2947     case 'p':    // Address.
2948     case 'X':    // Allow ANY value.
2949     case 'I':    // Target registers.
2950     case 'J':
2951     case 'K':
2952     case 'L':
2953     case 'M':
2954     case 'N':
2955     case 'O':
2956     case 'P':
2957     case '<':
2958     case '>':
2959       return C_Other;
2960     }
2961   }
2962
2963   if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
2964     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
2965       return C_Memory;
2966     return C_Register;
2967   }
2968   return C_Unknown;
2969 }
2970
2971 /// Try to replace an X constraint, which matches anything, with another that
2972 /// has more specific requirements based on the type of the corresponding
2973 /// operand.
2974 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
2975   if (ConstraintVT.isInteger())
2976     return "r";
2977   if (ConstraintVT.isFloatingPoint())
2978     return "f";      // works for many targets
2979   return nullptr;
2980 }
2981
2982 /// Lower the specified operand into the Ops vector.
2983 /// If it is invalid, don't add anything to Ops.
2984 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2985                                                   std::string &Constraint,
2986                                                   std::vector<SDValue> &Ops,
2987                                                   SelectionDAG &DAG) const {
2988
2989   if (Constraint.length() > 1) return;
2990
2991   char ConstraintLetter = Constraint[0];
2992   switch (ConstraintLetter) {
2993   default: break;
2994   case 'X':     // Allows any operand; labels (basic block) use this.
2995     if (Op.getOpcode() == ISD::BasicBlock) {
2996       Ops.push_back(Op);
2997       return;
2998     }
2999     LLVM_FALLTHROUGH;
3000   case 'i':    // Simple Integer or Relocatable Constant
3001   case 'n':    // Simple Integer
3002   case 's': {  // Relocatable Constant
3003     // These operands are interested in values of the form (GV+C), where C may
3004     // be folded in as an offset of GV, or it may be explicitly added.  Also, it
3005     // is possible and fine if either GV or C are missing.
3006     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
3007     GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
3008
3009     // If we have "(add GV, C)", pull out GV/C
3010     if (Op.getOpcode() == ISD::ADD) {
3011       C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3012       GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
3013       if (!C || !GA) {
3014         C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
3015         GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
3016       }
3017       if (!C || !GA) {
3018         C = nullptr;
3019         GA = nullptr;
3020       }
3021     }
3022
3023     // If we find a valid operand, map to the TargetXXX version so that the
3024     // value itself doesn't get selected.
3025     if (GA) {   // Either &GV   or   &GV+C
3026       if (ConstraintLetter != 'n') {
3027         int64_t Offs = GA->getOffset();
3028         if (C) Offs += C->getZExtValue();
3029         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
3030                                                  C ? SDLoc(C) : SDLoc(),
3031                                                  Op.getValueType(), Offs));
3032       }
3033       return;
3034     }
3035     if (C) {   // just C, no GV.
3036       // Simple constants are not allowed for 's'.
3037       if (ConstraintLetter != 's') {
3038         // gcc prints these as sign extended.  Sign extend value to 64 bits
3039         // now; without this it would get ZExt'd later in
3040         // ScheduleDAGSDNodes::EmitNode, which is very generic.
3041         Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
3042                                             SDLoc(C), MVT::i64));
3043       }
3044       return;
3045     }
3046     break;
3047   }
3048   }
3049 }
3050
3051 std::pair<unsigned, const TargetRegisterClass *>
3052 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
3053                                              StringRef Constraint,
3054                                              MVT VT) const {
3055   if (Constraint.empty() || Constraint[0] != '{')
3056     return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
3057   assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
3058
3059   // Remove the braces from around the name.
3060   StringRef RegName(Constraint.data()+1, Constraint.size()-2);
3061
3062   std::pair<unsigned, const TargetRegisterClass*> R =
3063     std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
3064
3065   // Figure out which register class contains this reg.
3066   for (const TargetRegisterClass *RC : RI->regclasses()) {
3067     // If none of the value types for this register class are valid, we
3068     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
3069     if (!isLegalRC(*RI, *RC))
3070       continue;
3071
3072     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
3073          I != E; ++I) {
3074       if (RegName.equals_lower(RI->getRegAsmName(*I))) {
3075         std::pair<unsigned, const TargetRegisterClass*> S =
3076           std::make_pair(*I, RC);
3077
3078         // If this register class has the requested value type, return it,
3079         // otherwise keep searching and return the first class found
3080         // if no other is found which explicitly has the requested type.
3081         if (RI->isTypeLegalForClass(*RC, VT))
3082           return S;
3083         if (!R.second)
3084           R = S;
3085       }
3086     }
3087   }
3088
3089   return R;
3090 }
3091
3092 //===----------------------------------------------------------------------===//
3093 // Constraint Selection.
3094
3095 /// Return true of this is an input operand that is a matching constraint like
3096 /// "4".
3097 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
3098   assert(!ConstraintCode.empty() && "No known constraint!");
3099   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
3100 }
3101
3102 /// If this is an input matching constraint, this method returns the output
3103 /// operand it matches.
3104 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
3105   assert(!ConstraintCode.empty() && "No known constraint!");
3106   return atoi(ConstraintCode.c_str());
3107 }
3108
3109 /// Split up the constraint string from the inline assembly value into the
3110 /// specific constraints and their prefixes, and also tie in the associated
3111 /// operand values.
3112 /// If this returns an empty vector, and if the constraint string itself
3113 /// isn't empty, there was an error parsing.
3114 TargetLowering::AsmOperandInfoVector
3115 TargetLowering::ParseConstraints(const DataLayout &DL,
3116                                  const TargetRegisterInfo *TRI,
3117                                  ImmutableCallSite CS) const {
3118   /// Information about all of the constraints.
3119   AsmOperandInfoVector ConstraintOperands;
3120   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
3121   unsigned maCount = 0; // Largest number of multiple alternative constraints.
3122
3123   // Do a prepass over the constraints, canonicalizing them, and building up the
3124   // ConstraintOperands list.
3125   unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
3126   unsigned ResNo = 0;   // ResNo - The result number of the next output.
3127
3128   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
3129     ConstraintOperands.emplace_back(std::move(CI));
3130     AsmOperandInfo &OpInfo = ConstraintOperands.back();
3131
3132     // Update multiple alternative constraint count.
3133     if (OpInfo.multipleAlternatives.size() > maCount)
3134       maCount = OpInfo.multipleAlternatives.size();
3135
3136     OpInfo.ConstraintVT = MVT::Other;
3137
3138     // Compute the value type for each operand.
3139     switch (OpInfo.Type) {
3140     case InlineAsm::isOutput:
3141       // Indirect outputs just consume an argument.
3142       if (OpInfo.isIndirect) {
3143         OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
3144         break;
3145       }
3146
3147       // The return value of the call is this value.  As such, there is no
3148       // corresponding argument.
3149       assert(!CS.getType()->isVoidTy() &&
3150              "Bad inline asm!");
3151       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
3152         OpInfo.ConstraintVT =
3153             getSimpleValueType(DL, STy->getElementType(ResNo));
3154       } else {
3155         assert(ResNo == 0 && "Asm only has one result!");
3156         OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
3157       }
3158       ++ResNo;
3159       break;
3160     case InlineAsm::isInput:
3161       OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
3162       break;
3163     case InlineAsm::isClobber:
3164       // Nothing to do.
3165       break;
3166     }
3167
3168     if (OpInfo.CallOperandVal) {
3169       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
3170       if (OpInfo.isIndirect) {
3171         llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
3172         if (!PtrTy)
3173           report_fatal_error("Indirect operand for inline asm not a pointer!");
3174         OpTy = PtrTy->getElementType();
3175       }
3176
3177       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
3178       if (StructType *STy = dyn_cast<StructType>(OpTy))
3179         if (STy->getNumElements() == 1)
3180           OpTy = STy->getElementType(0);
3181
3182       // If OpTy is not a single value, it may be a struct/union that we
3183       // can tile with integers.
3184       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
3185         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
3186         switch (BitSize) {
3187         default: break;
3188         case 1:
3189         case 8:
3190         case 16:
3191         case 32:
3192         case 64:
3193         case 128:
3194           OpInfo.ConstraintVT =
3195             MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
3196           break;
3197         }
3198       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
3199         unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
3200         OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
3201       } else {
3202         OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
3203       }
3204     }
3205   }
3206
3207   // If we have multiple alternative constraints, select the best alternative.
3208   if (!ConstraintOperands.empty()) {
3209     if (maCount) {
3210       unsigned bestMAIndex = 0;
3211       int bestWeight = -1;
3212       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
3213       int weight = -1;
3214       unsigned maIndex;
3215       // Compute the sums of the weights for each alternative, keeping track
3216       // of the best (highest weight) one so far.
3217       for (maIndex = 0; maIndex < maCount; ++maIndex) {
3218         int weightSum = 0;
3219         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
3220             cIndex != eIndex; ++cIndex) {
3221           AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
3222           if (OpInfo.Type == InlineAsm::isClobber)
3223             continue;
3224
3225           // If this is an output operand with a matching input operand,
3226           // look up the matching input. If their types mismatch, e.g. one
3227           // is an integer, the other is floating point, or their sizes are
3228           // different, flag it as an maCantMatch.
3229           if (OpInfo.hasMatchingInput()) {
3230             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
3231             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
3232               if ((OpInfo.ConstraintVT.isInteger() !=
3233                    Input.ConstraintVT.isInteger()) ||
3234                   (OpInfo.ConstraintVT.getSizeInBits() !=
3235                    Input.ConstraintVT.getSizeInBits())) {
3236                 weightSum = -1;  // Can't match.
3237                 break;
3238               }
3239             }
3240           }
3241           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
3242           if (weight == -1) {
3243             weightSum = -1;
3244             break;
3245           }
3246           weightSum += weight;
3247         }
3248         // Update best.
3249         if (weightSum > bestWeight) {
3250           bestWeight = weightSum;
3251           bestMAIndex = maIndex;
3252         }
3253       }
3254
3255       // Now select chosen alternative in each constraint.
3256       for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
3257           cIndex != eIndex; ++cIndex) {
3258         AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
3259         if (cInfo.Type == InlineAsm::isClobber)
3260           continue;
3261         cInfo.selectAlternative(bestMAIndex);
3262       }
3263     }
3264   }
3265
3266   // Check and hook up tied operands, choose constraint code to use.
3267   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
3268       cIndex != eIndex; ++cIndex) {
3269     AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
3270
3271     // If this is an output operand with a matching input operand, look up the
3272     // matching input. If their types mismatch, e.g. one is an integer, the
3273     // other is floating point, or their sizes are different, flag it as an
3274     // error.
3275     if (OpInfo.hasMatchingInput()) {
3276       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
3277
3278       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
3279         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
3280             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
3281                                          OpInfo.ConstraintVT);
3282         std::pair<unsigned, const TargetRegisterClass *> InputRC =
3283             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
3284                                          Input.ConstraintVT);
3285         if ((OpInfo.ConstraintVT.isInteger() !=
3286              Input.ConstraintVT.isInteger()) ||
3287             (MatchRC.second != InputRC.second)) {
3288           report_fatal_error("Unsupported asm: input constraint"
3289                              " with a matching output constraint of"
3290                              " incompatible type!");
3291         }
3292       }
3293     }
3294   }
3295
3296   return ConstraintOperands;
3297 }
3298
3299 /// Return an integer indicating how general CT is.
3300 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
3301   switch (CT) {
3302   case TargetLowering::C_Other:
3303   case TargetLowering::C_Unknown:
3304     return 0;
3305   case TargetLowering::C_Register:
3306     return 1;
3307   case TargetLowering::C_RegisterClass:
3308     return 2;
3309   case TargetLowering::C_Memory:
3310     return 3;
3311   }
3312   llvm_unreachable("Invalid constraint type");
3313 }
3314
3315 /// Examine constraint type and operand type and determine a weight value.
3316 /// This object must already have been set up with the operand type
3317 /// and the current alternative constraint selected.
3318 TargetLowering::ConstraintWeight
3319   TargetLowering::getMultipleConstraintMatchWeight(
3320     AsmOperandInfo &info, int maIndex) const {
3321   InlineAsm::ConstraintCodeVector *rCodes;
3322   if (maIndex >= (int)info.multipleAlternatives.size())
3323     rCodes = &info.Codes;
3324   else
3325     rCodes = &info.multipleAlternatives[maIndex].Codes;
3326   ConstraintWeight BestWeight = CW_Invalid;
3327
3328   // Loop over the options, keeping track of the most general one.
3329   for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
3330     ConstraintWeight weight =
3331       getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
3332     if (weight > BestWeight)
3333       BestWeight = weight;
3334   }
3335
3336   return BestWeight;
3337 }
3338
3339 /// Examine constraint type and operand type and determine a weight value.
3340 /// This object must already have been set up with the operand type
3341 /// and the current alternative constraint selected.
3342 TargetLowering::ConstraintWeight
3343   TargetLowering::getSingleConstraintMatchWeight(
3344     AsmOperandInfo &info, const char *constraint) const {
3345   ConstraintWeight weight = CW_Invalid;
3346   Value *CallOperandVal = info.CallOperandVal;
3347     // If we don't have a value, we can't do a match,
3348     // but allow it at the lowest weight.
3349   if (!CallOperandVal)
3350     return CW_Default;
3351   // Look at the constraint type.
3352   switch (*constraint) {
3353     case 'i': // immediate integer.
3354     case 'n': // immediate integer with a known value.
3355       if (isa<ConstantInt>(CallOperandVal))
3356         weight = CW_Constant;
3357       break;
3358     case 's': // non-explicit intregal immediate.
3359       if (isa<GlobalValue>(CallOperandVal))
3360         weight = CW_Constant;
3361       break;
3362     case 'E': // immediate float if host format.
3363     case 'F': // immediate float.
3364       if (isa<ConstantFP>(CallOperandVal))
3365         weight = CW_Constant;
3366       break;
3367     case '<': // memory operand with autodecrement.
3368     case '>': // memory operand with autoincrement.
3369     case 'm': // memory operand.
3370     case 'o': // offsettable memory operand
3371     case 'V': // non-offsettable memory operand
3372       weight = CW_Memory;
3373       break;
3374     case 'r': // general register.
3375     case 'g': // general register, memory operand or immediate integer.
3376               // note: Clang converts "g" to "imr".
3377       if (CallOperandVal->getType()->isIntegerTy())
3378         weight = CW_Register;
3379       break;
3380     case 'X': // any operand.
3381     default:
3382       weight = CW_Default;
3383       break;
3384   }
3385   return weight;
3386 }
3387
3388 /// If there are multiple different constraints that we could pick for this
3389 /// operand (e.g. "imr") try to pick the 'best' one.
3390 /// This is somewhat tricky: constraints fall into four classes:
3391 ///    Other         -> immediates and magic values
3392 ///    Register      -> one specific register
3393 ///    RegisterClass -> a group of regs
3394 ///    Memory        -> memory
3395 /// Ideally, we would pick the most specific constraint possible: if we have
3396 /// something that fits into a register, we would pick it.  The problem here
3397 /// is that if we have something that could either be in a register or in
3398 /// memory that use of the register could cause selection of *other*
3399 /// operands to fail: they might only succeed if we pick memory.  Because of
3400 /// this the heuristic we use is:
3401 ///
3402 ///  1) If there is an 'other' constraint, and if the operand is valid for
3403 ///     that constraint, use it.  This makes us take advantage of 'i'
3404 ///     constraints when available.
3405 ///  2) Otherwise, pick the most general constraint present.  This prefers
3406 ///     'm' over 'r', for example.
3407 ///
3408 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
3409                              const TargetLowering &TLI,
3410                              SDValue Op, SelectionDAG *DAG) {
3411   assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
3412   unsigned BestIdx = 0;
3413   TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
3414   int BestGenerality = -1;
3415
3416   // Loop over the options, keeping track of the most general one.
3417   for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
3418     TargetLowering::ConstraintType CType =
3419       TLI.getConstraintType(OpInfo.Codes[i]);
3420
3421     // If this is an 'other' constraint, see if the operand is valid for it.
3422     // For example, on X86 we might have an 'rI' constraint.  If the operand
3423     // is an integer in the range [0..31] we want to use I (saving a load
3424     // of a register), otherwise we must use 'r'.
3425     if (CType == TargetLowering::C_Other && Op.getNode()) {
3426       assert(OpInfo.Codes[i].size() == 1 &&
3427              "Unhandled multi-letter 'other' constraint");
3428       std::vector<SDValue> ResultOps;
3429       TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
3430                                        ResultOps, *DAG);
3431       if (!ResultOps.empty()) {
3432         BestType = CType;
3433         BestIdx = i;
3434         break;
3435       }
3436     }
3437
3438     // Things with matching constraints can only be registers, per gcc
3439     // documentation.  This mainly affects "g" constraints.
3440     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
3441       continue;
3442
3443     // This constraint letter is more general than the previous one, use it.
3444     int Generality = getConstraintGenerality(CType);
3445     if (Generality > BestGenerality) {
3446       BestType = CType;
3447       BestIdx = i;
3448       BestGenerality = Generality;
3449     }
3450   }
3451
3452   OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
3453   OpInfo.ConstraintType = BestType;
3454 }
3455
3456 /// Determines the constraint code and constraint type to use for the specific
3457 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
3458 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
3459                                             SDValue Op,
3460                                             SelectionDAG *DAG) const {
3461   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
3462
3463   // Single-letter constraints ('r') are very common.
3464   if (OpInfo.Codes.size() == 1) {
3465     OpInfo.ConstraintCode = OpInfo.Codes[0];
3466     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
3467   } else {
3468     ChooseConstraint(OpInfo, *this, Op, DAG);
3469   }
3470
3471   // 'X' matches anything.
3472   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
3473     // Labels and constants are handled elsewhere ('X' is the only thing
3474     // that matches labels).  For Functions, the type here is the type of
3475     // the result, which is not what we want to look at; leave them alone.
3476     Value *v = OpInfo.CallOperandVal;
3477     if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
3478       OpInfo.CallOperandVal = v;
3479       return;
3480     }
3481
3482     // Otherwise, try to resolve it to something we know about by looking at
3483     // the actual operand type.
3484     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
3485       OpInfo.ConstraintCode = Repl;
3486       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
3487     }
3488   }
3489 }
3490
3491 /// Given an exact SDIV by a constant, create a multiplication
3492 /// with the multiplicative inverse of the constant.
3493 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
3494                               const SDLoc &dl, SelectionDAG &DAG,
3495                               SmallVectorImpl<SDNode *> &Created) {
3496   SDValue Op0 = N->getOperand(0);
3497   SDValue Op1 = N->getOperand(1);
3498   EVT VT = N->getValueType(0);
3499   EVT SVT = VT.getScalarType();
3500   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
3501   EVT ShSVT = ShVT.getScalarType();
3502
3503   bool UseSRA = false;
3504   SmallVector<SDValue, 16> Shifts, Factors;
3505
3506   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
3507     if (C->isNullValue())
3508       return false;
3509     APInt Divisor = C->getAPIntValue();
3510     unsigned Shift = Divisor.countTrailingZeros();
3511     if (Shift) {
3512       Divisor.ashrInPlace(Shift);
3513       UseSRA = true;
3514     }
3515     // Calculate the multiplicative inverse, using Newton's method.
3516     APInt t;
3517     APInt Factor = Divisor;
3518     while ((t = Divisor * Factor) != 1)
3519       Factor *= APInt(Divisor.getBitWidth(), 2) - t;
3520     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
3521     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
3522     return true;
3523   };
3524
3525   // Collect all magic values from the build vector.
3526   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
3527     return SDValue();
3528
3529   SDValue Shift, Factor;
3530   if (VT.isVector()) {
3531     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
3532     Factor = DAG.getBuildVector(VT, dl, Factors);
3533   } else {
3534     Shift = Shifts[0];
3535     Factor = Factors[0];
3536   }
3537
3538   SDValue Res = Op0;
3539
3540   // Shift the value upfront if it is even, so the LSB is one.
3541   if (UseSRA) {
3542     // TODO: For UDIV use SRL instead of SRA.
3543     SDNodeFlags Flags;
3544     Flags.setExact(true);
3545     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
3546     Created.push_back(Res.getNode());
3547   }
3548
3549   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
3550 }
3551
3552 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
3553                                      SelectionDAG &DAG,
3554                                      SmallVectorImpl<SDNode *> &Created) const {
3555   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3556   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3557   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
3558     return SDValue(N,0); // Lower SDIV as SDIV
3559   return SDValue();
3560 }
3561
3562 /// Given an ISD::SDIV node expressing a divide by constant,
3563 /// return a DAG expression to select that will generate the same value by
3564 /// multiplying by a magic number.
3565 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
3566 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
3567                                   bool IsAfterLegalization,
3568                                   SmallVectorImpl<SDNode *> &Created) const {
3569   SDLoc dl(N);
3570   EVT VT = N->getValueType(0);
3571   EVT SVT = VT.getScalarType();
3572   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
3573   EVT ShSVT = ShVT.getScalarType();
3574   unsigned EltBits = VT.getScalarSizeInBits();
3575
3576   // Check to see if we can do this.
3577   // FIXME: We should be more aggressive here.
3578   if (!isTypeLegal(VT))
3579     return SDValue();
3580
3581   // If the sdiv has an 'exact' bit we can use a simpler lowering.
3582   if (N->getFlags().hasExact())
3583     return BuildExactSDIV(*this, N, dl, DAG, Created);
3584
3585   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
3586
3587   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
3588     if (C->isNullValue())
3589       return false;
3590
3591     const APInt &Divisor = C->getAPIntValue();
3592     APInt::ms magics = Divisor.magic();
3593     int NumeratorFactor = 0;
3594     int ShiftMask = -1;
3595
3596     if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
3597       // If d is +1/-1, we just multiply the numerator by +1/-1.
3598       NumeratorFactor = Divisor.getSExtValue();
3599       magics.m = 0;
3600       magics.s = 0;
3601       ShiftMask = 0;
3602     } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
3603       // If d > 0 and m < 0, add the numerator.
3604       NumeratorFactor = 1;
3605     } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
3606       // If d < 0 and m > 0, subtract the numerator.
3607       NumeratorFactor = -1;
3608     }
3609
3610     MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
3611     Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
3612     Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
3613     ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
3614     return true;
3615   };
3616
3617   SDValue N0 = N->getOperand(0);
3618   SDValue N1 = N->getOperand(1);
3619
3620   // Collect the shifts / magic values from each element.
3621   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
3622     return SDValue();
3623
3624   SDValue MagicFactor, Factor, Shift, ShiftMask;
3625   if (VT.isVector()) {
3626     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
3627     Factor = DAG.getBuildVector(VT, dl, Factors);
3628     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
3629     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
3630   } else {
3631     MagicFactor = MagicFactors[0];
3632     Factor = Factors[0];
3633     Shift = Shifts[0];
3634     ShiftMask = ShiftMasks[0];
3635   }
3636
3637   // Multiply the numerator (operand 0) by the magic value.
3638   // FIXME: We should support doing a MUL in a wider type.
3639   SDValue Q;
3640   if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
3641                           : isOperationLegalOrCustom(ISD::MULHS, VT))
3642     Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
3643   else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
3644                                : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
3645     SDValue LoHi =
3646         DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
3647     Q = SDValue(LoHi.getNode(), 1);
3648   } else
3649     return SDValue(); // No mulhs or equivalent.
3650   Created.push_back(Q.getNode());
3651
3652   // (Optionally) Add/subtract the numerator using Factor.
3653   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
3654   Created.push_back(Factor.getNode());
3655   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
3656   Created.push_back(Q.getNode());
3657
3658   // Shift right algebraic by shift value.
3659   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
3660   Created.push_back(Q.getNode());
3661
3662   // Extract the sign bit, mask it and add it to the quotient.
3663   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
3664   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
3665   Created.push_back(T.getNode());
3666   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
3667   Created.push_back(T.getNode());
3668   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
3669 }
3670
3671 /// Given an ISD::UDIV node expressing a divide by constant,
3672 /// return a DAG expression to select that will generate the same value by
3673 /// multiplying by a magic number.
3674 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
3675 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
3676                                   bool IsAfterLegalization,
3677                                   SmallVectorImpl<SDNode *> &Created) const {
3678   SDLoc dl(N);
3679   EVT VT = N->getValueType(0);
3680   EVT SVT = VT.getScalarType();
3681   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
3682   EVT ShSVT = ShVT.getScalarType();
3683   unsigned EltBits = VT.getScalarSizeInBits();
3684
3685   // Check to see if we can do this.
3686   // FIXME: We should be more aggressive here.
3687   if (!isTypeLegal(VT))
3688     return SDValue();
3689
3690   bool UseNPQ = false;
3691   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
3692
3693   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
3694     if (C->isNullValue())
3695       return false;
3696     // FIXME: We should use a narrower constant when the upper
3697     // bits are known to be zero.
3698     APInt Divisor = C->getAPIntValue();
3699     APInt::mu magics = Divisor.magicu();
3700     unsigned PreShift = 0, PostShift = 0;
3701
3702     // If the divisor is even, we can avoid using the expensive fixup by
3703     // shifting the divided value upfront.
3704     if (magics.a != 0 && !Divisor[0]) {
3705       PreShift = Divisor.countTrailingZeros();
3706       // Get magic number for the shifted divisor.
3707       magics = Divisor.lshr(PreShift).magicu(PreShift);
3708       assert(magics.a == 0 && "Should use cheap fixup now");
3709     }
3710
3711     APInt Magic = magics.m;
3712
3713     unsigned SelNPQ;
3714     if (magics.a == 0 || Divisor.isOneValue()) {
3715       assert(magics.s < Divisor.getBitWidth() &&
3716              "We shouldn't generate an undefined shift!");
3717       PostShift = magics.s;
3718       SelNPQ = false;
3719     } else {
3720       PostShift = magics.s - 1;
3721       SelNPQ = true;
3722     }
3723
3724     PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
3725     MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
3726     NPQFactors.push_back(
3727         DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
3728                                : APInt::getNullValue(EltBits),
3729                         dl, SVT));
3730     PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
3731     UseNPQ |= SelNPQ;
3732     return true;
3733   };
3734
3735   SDValue N0 = N->getOperand(0);
3736   SDValue N1 = N->getOperand(1);
3737
3738   // Collect the shifts/magic values from each element.
3739   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
3740     return SDValue();
3741
3742   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
3743   if (VT.isVector()) {
3744     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
3745     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
3746     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
3747     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
3748   } else {
3749     PreShift = PreShifts[0];
3750     MagicFactor = MagicFactors[0];
3751     PostShift = PostShifts[0];
3752   }
3753
3754   SDValue Q = N0;
3755   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
3756   Created.push_back(Q.getNode());
3757
3758   // FIXME: We should support doing a MUL in a wider type.
3759   auto GetMULHU = [&](SDValue X, SDValue Y) {
3760     if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
3761                             : isOperationLegalOrCustom(ISD::MULHU, VT))
3762       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
3763     if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
3764                             : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
3765       SDValue LoHi =
3766           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
3767       return SDValue(LoHi.getNode(), 1);
3768     }
3769     return SDValue(); // No mulhu or equivalent
3770   };
3771
3772   // Multiply the numerator (operand 0) by the magic value.
3773   Q = GetMULHU(Q, MagicFactor);
3774   if (!Q)
3775     return SDValue();
3776
3777   Created.push_back(Q.getNode());
3778
3779   if (UseNPQ) {
3780     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
3781     Created.push_back(NPQ.getNode());
3782
3783     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
3784     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
3785     if (VT.isVector())
3786       NPQ = GetMULHU(NPQ, NPQFactor);
3787     else
3788       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
3789
3790     Created.push_back(NPQ.getNode());
3791
3792     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
3793     Created.push_back(Q.getNode());
3794   }
3795
3796   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
3797   Created.push_back(Q.getNode());
3798
3799   SDValue One = DAG.getConstant(1, dl, VT);
3800   SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
3801   return DAG.getSelect(dl, VT, IsOne, N0, Q);
3802 }
3803
3804 bool TargetLowering::
3805 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
3806   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3807     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
3808                                 "be a constant integer");
3809     return true;
3810   }
3811
3812   return false;
3813 }
3814
3815 //===----------------------------------------------------------------------===//
3816 // Legalization Utilities
3817 //===----------------------------------------------------------------------===//
3818
3819 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
3820                                     SDValue LHS, SDValue RHS,
3821                                     SmallVectorImpl<SDValue> &Result,
3822                                     EVT HiLoVT, SelectionDAG &DAG,
3823                                     MulExpansionKind Kind, SDValue LL,
3824                                     SDValue LH, SDValue RL, SDValue RH) const {
3825   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
3826          Opcode == ISD::SMUL_LOHI);
3827
3828   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
3829                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
3830   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
3831                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
3832   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
3833                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
3834   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
3835                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
3836
3837   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
3838     return false;
3839
3840   unsigned OuterBitSize = VT.getScalarSizeInBits();
3841   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
3842   unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
3843   unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
3844
3845   // LL, LH, RL, and RH must be either all NULL or all set to a value.
3846   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
3847          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
3848
3849   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
3850   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
3851                           bool Signed) -> bool {
3852     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
3853       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
3854       Hi = SDValue(Lo.getNode(), 1);
3855       return true;
3856     }
3857     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
3858       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
3859       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
3860       return true;
3861     }
3862     return false;
3863   };
3864
3865   SDValue Lo, Hi;
3866
3867   if (!LL.getNode() && !RL.getNode() &&
3868       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
3869     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
3870     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
3871   }
3872
3873   if (!LL.getNode())
3874     return false;
3875
3876   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
3877   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
3878       DAG.MaskedValueIsZero(RHS, HighMask)) {
3879     // The inputs are both zero-extended.
3880     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
3881       Result.push_back(Lo);
3882       Result.push_back(Hi);
3883       if (Opcode != ISD::MUL) {
3884         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
3885         Result.push_back(Zero);
3886         Result.push_back(Zero);
3887       }
3888       return true;
3889     }
3890   }
3891
3892   if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
3893       RHSSB > InnerBitSize) {
3894     // The input values are both sign-extended.
3895     // TODO non-MUL case?
3896     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
3897       Result.push_back(Lo);
3898       Result.push_back(Hi);
3899       return true;
3900     }
3901   }
3902
3903   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
3904   EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
3905   if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
3906     // FIXME getShiftAmountTy does not always return a sensible result when VT
3907     // is an illegal type, and so the type may be too small to fit the shift
3908     // amount. Override it with i32. The shift will have to be legalized.
3909     ShiftAmountTy = MVT::i32;
3910   }
3911   SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
3912
3913   if (!LH.getNode() && !RH.getNode() &&
3914       isOperationLegalOrCustom(ISD::SRL, VT) &&
3915       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
3916     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
3917     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
3918     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
3919     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
3920   }
3921
3922   if (!LH.getNode())
3923     return false;
3924
3925   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
3926     return false;
3927
3928   Result.push_back(Lo);
3929
3930   if (Opcode == ISD::MUL) {
3931     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
3932     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
3933     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
3934     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
3935     Result.push_back(Hi);
3936     return true;
3937   }
3938
3939   // Compute the full width result.
3940   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
3941     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
3942     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
3943     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
3944     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
3945   };
3946
3947   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
3948   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
3949     return false;
3950
3951   // This is effectively the add part of a multiply-add of half-sized operands,
3952   // so it cannot overflow.
3953   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
3954
3955   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
3956     return false;
3957
3958   Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
3959                      Merge(Lo, Hi));
3960
3961   SDValue Carry = Next.getValue(1);
3962   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
3963   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
3964
3965   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
3966     return false;
3967
3968   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
3969   Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
3970                    Carry);
3971   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
3972
3973   if (Opcode == ISD::SMUL_LOHI) {
3974     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
3975                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
3976     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
3977
3978     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
3979                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
3980     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
3981   }
3982
3983   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
3984   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
3985   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
3986   return true;
3987 }
3988
3989 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
3990                                SelectionDAG &DAG, MulExpansionKind Kind,
3991                                SDValue LL, SDValue LH, SDValue RL,
3992                                SDValue RH) const {
3993   SmallVector<SDValue, 2> Result;
3994   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
3995                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
3996                            DAG, Kind, LL, LH, RL, RH);
3997   if (Ok) {
3998     assert(Result.size() == 2);
3999     Lo = Result[0];
4000     Hi = Result[1];
4001   }
4002   return Ok;
4003 }
4004
4005 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
4006                                SelectionDAG &DAG) const {
4007   EVT VT = Node->getOperand(0).getValueType();
4008   EVT NVT = Node->getValueType(0);
4009   SDLoc dl(SDValue(Node, 0));
4010
4011   // FIXME: Only f32 to i64 conversions are supported.
4012   if (VT != MVT::f32 || NVT != MVT::i64)
4013     return false;
4014
4015   // Expand f32 -> i64 conversion
4016   // This algorithm comes from compiler-rt's implementation of fixsfdi:
4017   // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
4018   EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
4019                                 VT.getSizeInBits());
4020   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
4021   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
4022   SDValue Bias = DAG.getConstant(127, dl, IntVT);
4023   SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl,
4024                                      IntVT);
4025   SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
4026   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
4027
4028   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
4029
4030   auto &DL = DAG.getDataLayout();
4031   SDValue ExponentBits = DAG.getNode(
4032       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
4033       DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL)));
4034   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
4035
4036   SDValue Sign = DAG.getNode(
4037       ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
4038       DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL)));
4039   Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
4040
4041   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
4042       DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
4043       DAG.getConstant(0x00800000, dl, IntVT));
4044
4045   R = DAG.getZExtOrTrunc(R, dl, NVT);
4046
4047   R = DAG.getSelectCC(
4048       dl, Exponent, ExponentLoBit,
4049       DAG.getNode(ISD::SHL, dl, NVT, R,
4050                   DAG.getZExtOrTrunc(
4051                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
4052                       dl, getShiftAmountTy(IntVT, DL))),
4053       DAG.getNode(ISD::SRL, dl, NVT, R,
4054                   DAG.getZExtOrTrunc(
4055                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
4056                       dl, getShiftAmountTy(IntVT, DL))),
4057       ISD::SETGT);
4058
4059   SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
4060       DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
4061       Sign);
4062
4063   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
4064       DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
4065   return true;
4066 }
4067
4068 SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
4069                                             SelectionDAG &DAG) const {
4070   SDLoc SL(LD);
4071   SDValue Chain = LD->getChain();
4072   SDValue BasePTR = LD->getBasePtr();
4073   EVT SrcVT = LD->getMemoryVT();
4074   ISD::LoadExtType ExtType = LD->getExtensionType();
4075
4076   unsigned NumElem = SrcVT.getVectorNumElements();
4077
4078   EVT SrcEltVT = SrcVT.getScalarType();
4079   EVT DstEltVT = LD->getValueType(0).getScalarType();
4080
4081   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
4082   assert(SrcEltVT.isByteSized());
4083
4084   SmallVector<SDValue, 8> Vals;
4085   SmallVector<SDValue, 8> LoadChains;
4086
4087   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
4088     SDValue ScalarLoad =
4089         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
4090                        LD->getPointerInfo().getWithOffset(Idx * Stride),
4091                        SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
4092                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
4093
4094     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
4095
4096     Vals.push_back(ScalarLoad.getValue(0));
4097     LoadChains.push_back(ScalarLoad.getValue(1));
4098   }
4099
4100   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
4101   SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
4102
4103   return DAG.getMergeValues({ Value, NewChain }, SL);
4104 }
4105
4106 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
4107                                              SelectionDAG &DAG) const {
4108   SDLoc SL(ST);
4109
4110   SDValue Chain = ST->getChain();
4111   SDValue BasePtr = ST->getBasePtr();
4112   SDValue Value = ST->getValue();
4113   EVT StVT = ST->getMemoryVT();
4114
4115   // The type of the data we want to save
4116   EVT RegVT = Value.getValueType();
4117   EVT RegSclVT = RegVT.getScalarType();
4118
4119   // The type of data as saved in memory.
4120   EVT MemSclVT = StVT.getScalarType();
4121
4122   EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
4123   unsigned NumElem = StVT.getVectorNumElements();
4124
4125   // A vector must always be stored in memory as-is, i.e. without any padding
4126   // between the elements, since various code depend on it, e.g. in the
4127   // handling of a bitcast of a vector type to int, which may be done with a
4128   // vector store followed by an integer load. A vector that does not have
4129   // elements that are byte-sized must therefore be stored as an integer
4130   // built out of the extracted vector elements.
4131   if (!MemSclVT.isByteSized()) {
4132     unsigned NumBits = StVT.getSizeInBits();
4133     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
4134
4135     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
4136
4137     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
4138       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
4139                                 DAG.getConstant(Idx, SL, IdxVT));
4140       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
4141       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
4142       unsigned ShiftIntoIdx =
4143           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
4144       SDValue ShiftAmount =
4145           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
4146       SDValue ShiftedElt =
4147           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
4148       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
4149     }
4150
4151     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
4152                         ST->getAlignment(), ST->getMemOperand()->getFlags(),
4153                         ST->getAAInfo());
4154   }
4155
4156   // Store Stride in bytes
4157   unsigned Stride = MemSclVT.getSizeInBits() / 8;
4158   assert (Stride && "Zero stride!");
4159   // Extract each of the elements from the original vector and save them into
4160   // memory individually.
4161   SmallVector<SDValue, 8> Stores;
4162   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
4163     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
4164                               DAG.getConstant(Idx, SL, IdxVT));
4165
4166     SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
4167
4168     // This scalar TruncStore may be illegal, but we legalize it later.
4169     SDValue Store = DAG.getTruncStore(
4170         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
4171         MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
4172         ST->getMemOperand()->getFlags(), ST->getAAInfo());
4173
4174     Stores.push_back(Store);
4175   }
4176
4177   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
4178 }
4179
4180 std::pair<SDValue, SDValue>
4181 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
4182   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
4183          "unaligned indexed loads not implemented!");
4184   SDValue Chain = LD->getChain();
4185   SDValue Ptr = LD->getBasePtr();
4186   EVT VT = LD->getValueType(0);
4187   EVT LoadedVT = LD->getMemoryVT();
4188   SDLoc dl(LD);
4189   auto &MF = DAG.getMachineFunction();
4190
4191   if (VT.isFloatingPoint() || VT.isVector()) {
4192     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
4193     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
4194       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
4195           LoadedVT.isVector()) {
4196         // Scalarize the load and let the individual components be handled.
4197         SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
4198         if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
4199           return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
4200         return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
4201       }
4202
4203       // Expand to a (misaligned) integer load of the same size,
4204       // then bitconvert to floating point or vector.
4205       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
4206                                     LD->getMemOperand());
4207       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
4208       if (LoadedVT != VT)
4209         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
4210                              ISD::ANY_EXTEND, dl, VT, Result);
4211
4212       return std::make_pair(Result, newLoad.getValue(1));
4213     }
4214
4215     // Copy the value to a (aligned) stack slot using (unaligned) integer
4216     // loads and stores, then do a (aligned) load from the stack slot.
4217     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
4218     unsigned LoadedBytes = LoadedVT.getStoreSize();
4219     unsigned RegBytes = RegVT.getSizeInBits() / 8;
4220     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
4221
4222     // Make sure the stack slot is also aligned for the register type.
4223     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
4224     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
4225     SmallVector<SDValue, 8> Stores;
4226     SDValue StackPtr = StackBase;
4227     unsigned Offset = 0;
4228
4229     EVT PtrVT = Ptr.getValueType();
4230     EVT StackPtrVT = StackPtr.getValueType();
4231
4232     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
4233     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
4234
4235     // Do all but one copies using the full register width.
4236     for (unsigned i = 1; i < NumRegs; i++) {
4237       // Load one integer register's worth from the original location.
4238       SDValue Load = DAG.getLoad(
4239           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
4240           MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
4241           LD->getAAInfo());
4242       // Follow the load with a store to the stack slot.  Remember the store.
4243       Stores.push_back(DAG.getStore(
4244           Load.getValue(1), dl, Load, StackPtr,
4245           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
4246       // Increment the pointers.
4247       Offset += RegBytes;
4248
4249       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
4250       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
4251     }
4252
4253     // The last copy may be partial.  Do an extending load.
4254     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
4255                                   8 * (LoadedBytes - Offset));
4256     SDValue Load =
4257         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
4258                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
4259                        MinAlign(LD->getAlignment(), Offset),
4260                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
4261     // Follow the load with a store to the stack slot.  Remember the store.
4262     // On big-endian machines this requires a truncating store to ensure
4263     // that the bits end up in the right place.
4264     Stores.push_back(DAG.getTruncStore(
4265         Load.getValue(1), dl, Load, StackPtr,
4266         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
4267
4268     // The order of the stores doesn't matter - say it with a TokenFactor.
4269     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
4270
4271     // Finally, perform the original load only redirected to the stack slot.
4272     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
4273                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
4274                           LoadedVT);
4275
4276     // Callers expect a MERGE_VALUES node.
4277     return std::make_pair(Load, TF);
4278   }
4279
4280   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
4281          "Unaligned load of unsupported type.");
4282
4283   // Compute the new VT that is half the size of the old one.  This is an
4284   // integer MVT.
4285   unsigned NumBits = LoadedVT.getSizeInBits();
4286   EVT NewLoadedVT;
4287   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
4288   NumBits >>= 1;
4289
4290   unsigned Alignment = LD->getAlignment();
4291   unsigned IncrementSize = NumBits / 8;
4292   ISD::LoadExtType HiExtType = LD->getExtensionType();
4293
4294   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
4295   if (HiExtType == ISD::NON_EXTLOAD)
4296     HiExtType = ISD::ZEXTLOAD;
4297
4298   // Load the value in two parts
4299   SDValue Lo, Hi;
4300   if (DAG.getDataLayout().isLittleEndian()) {
4301     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
4302                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
4303                         LD->getAAInfo());
4304
4305     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
4306     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
4307                         LD->getPointerInfo().getWithOffset(IncrementSize),
4308                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
4309                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
4310   } else {
4311     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
4312                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
4313                         LD->getAAInfo());
4314
4315     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
4316     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
4317                         LD->getPointerInfo().getWithOffset(IncrementSize),
4318                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
4319                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
4320   }
4321
4322   // aggregate the two parts
4323   SDValue ShiftAmount =
4324       DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
4325                                                     DAG.getDataLayout()));
4326   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
4327   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
4328
4329   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
4330                              Hi.getValue(1));
4331
4332   return std::make_pair(Result, TF);
4333 }
4334
4335 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
4336                                              SelectionDAG &DAG) const {
4337   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
4338          "unaligned indexed stores not implemented!");
4339   SDValue Chain = ST->getChain();
4340   SDValue Ptr = ST->getBasePtr();
4341   SDValue Val = ST->getValue();
4342   EVT VT = Val.getValueType();
4343   int Alignment = ST->getAlignment();
4344   auto &MF = DAG.getMachineFunction();
4345   EVT MemVT = ST->getMemoryVT();
4346
4347   SDLoc dl(ST);
4348   if (MemVT.isFloatingPoint() || MemVT.isVector()) {
4349     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
4350     if (isTypeLegal(intVT)) {
4351       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
4352           MemVT.isVector()) {
4353         // Scalarize the store and let the individual components be handled.
4354         SDValue Result = scalarizeVectorStore(ST, DAG);
4355
4356         return Result;
4357       }
4358       // Expand to a bitconvert of the value to the integer type of the
4359       // same size, then a (misaligned) int store.
4360       // FIXME: Does not handle truncating floating point stores!
4361       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
4362       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
4363                             Alignment, ST->getMemOperand()->getFlags());
4364       return Result;
4365     }
4366     // Do a (aligned) store to a stack slot, then copy from the stack slot
4367     // to the final destination using (unaligned) integer loads and stores.
4368     EVT StoredVT = ST->getMemoryVT();
4369     MVT RegVT =
4370       getRegisterType(*DAG.getContext(),
4371                       EVT::getIntegerVT(*DAG.getContext(),
4372                                         StoredVT.getSizeInBits()));
4373     EVT PtrVT = Ptr.getValueType();
4374     unsigned StoredBytes = StoredVT.getStoreSize();
4375     unsigned RegBytes = RegVT.getSizeInBits() / 8;
4376     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
4377
4378     // Make sure the stack slot is also aligned for the register type.
4379     SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
4380     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
4381
4382     // Perform the original store, only redirected to the stack slot.
4383     SDValue Store = DAG.getTruncStore(
4384         Chain, dl, Val, StackPtr,
4385         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT);
4386
4387     EVT StackPtrVT = StackPtr.getValueType();
4388
4389     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
4390     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
4391     SmallVector<SDValue, 8> Stores;
4392     unsigned Offset = 0;
4393
4394     // Do all but one copies using the full register width.
4395     for (unsigned i = 1; i < NumRegs; i++) {
4396       // Load one integer register's worth from the stack slot.
4397       SDValue Load = DAG.getLoad(
4398           RegVT, dl, Store, StackPtr,
4399           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
4400       // Store it to the final location.  Remember the store.
4401       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
4402                                     ST->getPointerInfo().getWithOffset(Offset),
4403                                     MinAlign(ST->getAlignment(), Offset),
4404                                     ST->getMemOperand()->getFlags()));
4405       // Increment the pointers.
4406       Offset += RegBytes;
4407       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
4408       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
4409     }
4410
4411     // The last store may be partial.  Do a truncating store.  On big-endian
4412     // machines this requires an extending load from the stack slot to ensure
4413     // that the bits are in the right place.
4414     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
4415                                   8 * (StoredBytes - Offset));
4416
4417     // Load from the stack slot.
4418     SDValue Load = DAG.getExtLoad(
4419         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
4420         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT);
4421
4422     Stores.push_back(
4423         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
4424                           ST->getPointerInfo().getWithOffset(Offset), MemVT,
4425                           MinAlign(ST->getAlignment(), Offset),
4426                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
4427     // The order of the stores doesn't matter - say it with a TokenFactor.
4428     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
4429     return Result;
4430   }
4431
4432   assert(ST->getMemoryVT().isInteger() &&
4433          !ST->getMemoryVT().isVector() &&
4434          "Unaligned store of unknown type.");
4435   // Get the half-size VT
4436   EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
4437   int NumBits = NewStoredVT.getSizeInBits();
4438   int IncrementSize = NumBits / 8;
4439
4440   // Divide the stored value in two parts.
4441   SDValue ShiftAmount =
4442       DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(),
4443                                                     DAG.getDataLayout()));
4444   SDValue Lo = Val;
4445   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
4446
4447   // Store the two parts
4448   SDValue Store1, Store2;
4449   Store1 = DAG.getTruncStore(Chain, dl,
4450                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
4451                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
4452                              ST->getMemOperand()->getFlags());
4453
4454   Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
4455   Alignment = MinAlign(Alignment, IncrementSize);
4456   Store2 = DAG.getTruncStore(
4457       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
4458       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
4459       ST->getMemOperand()->getFlags(), ST->getAAInfo());
4460
4461   SDValue Result =
4462     DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
4463   return Result;
4464 }
4465
4466 SDValue
4467 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
4468                                        const SDLoc &DL, EVT DataVT,
4469                                        SelectionDAG &DAG,
4470                                        bool IsCompressedMemory) const {
4471   SDValue Increment;
4472   EVT AddrVT = Addr.getValueType();
4473   EVT MaskVT = Mask.getValueType();
4474   assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
4475          "Incompatible types of Data and Mask");
4476   if (IsCompressedMemory) {
4477     // Incrementing the pointer according to number of '1's in the mask.
4478     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
4479     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
4480     if (MaskIntVT.getSizeInBits() < 32) {
4481       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
4482       MaskIntVT = MVT::i32;
4483     }
4484
4485     // Count '1's with POPCNT.
4486     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
4487     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
4488     // Scale is an element size in bytes.
4489     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
4490                                     AddrVT);
4491     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
4492   } else
4493     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
4494
4495   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
4496 }
4497
4498 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
4499                                        SDValue Idx,
4500                                        EVT VecVT,
4501                                        const SDLoc &dl) {
4502   if (isa<ConstantSDNode>(Idx))
4503     return Idx;
4504
4505   EVT IdxVT = Idx.getValueType();
4506   unsigned NElts = VecVT.getVectorNumElements();
4507   if (isPowerOf2_32(NElts)) {
4508     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
4509                                      Log2_32(NElts));
4510     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
4511                        DAG.getConstant(Imm, dl, IdxVT));
4512   }
4513
4514   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
4515                      DAG.getConstant(NElts - 1, dl, IdxVT));
4516 }
4517
4518 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
4519                                                 SDValue VecPtr, EVT VecVT,
4520                                                 SDValue Index) const {
4521   SDLoc dl(Index);
4522   // Make sure the index type is big enough to compute in.
4523   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
4524
4525   EVT EltVT = VecVT.getVectorElementType();
4526
4527   // Calculate the element offset and add it to the pointer.
4528   unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
4529   assert(EltSize * 8 == EltVT.getSizeInBits() &&
4530          "Converting bits to bytes lost precision");
4531
4532   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
4533
4534   EVT IdxVT = Index.getValueType();
4535
4536   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
4537                       DAG.getConstant(EltSize, dl, IdxVT));
4538   return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
4539 }
4540
4541 //===----------------------------------------------------------------------===//
4542 // Implementation of Emulated TLS Model
4543 //===----------------------------------------------------------------------===//
4544
4545 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
4546                                                 SelectionDAG &DAG) const {
4547   // Access to address of TLS varialbe xyz is lowered to a function call:
4548   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
4549   EVT PtrVT = getPointerTy(DAG.getDataLayout());
4550   PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
4551   SDLoc dl(GA);
4552
4553   ArgListTy Args;
4554   ArgListEntry Entry;
4555   std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
4556   Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
4557   StringRef EmuTlsVarName(NameString);
4558   GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
4559   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
4560   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
4561   Entry.Ty = VoidPtrType;
4562   Args.push_back(Entry);
4563
4564   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
4565
4566   TargetLowering::CallLoweringInfo CLI(DAG);
4567   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
4568   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
4569   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4570
4571   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
4572   // At last for X86 targets, maybe good for other targets too?
4573   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4574   MFI.setAdjustsStack(true);  // Is this only for X86 target?
4575   MFI.setHasCalls(true);
4576
4577   assert((GA->getOffset() == 0) &&
4578          "Emulated TLS must have zero offset in GlobalAddressSDNode");
4579   return CallResult.first;
4580 }
4581
4582 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
4583                                                 SelectionDAG &DAG) const {
4584   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
4585   if (!isCtlzFast())
4586     return SDValue();
4587   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
4588   SDLoc dl(Op);
4589   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
4590     if (C->isNullValue() && CC == ISD::SETEQ) {
4591       EVT VT = Op.getOperand(0).getValueType();
4592       SDValue Zext = Op.getOperand(0);
4593       if (VT.bitsLT(MVT::i32)) {
4594         VT = MVT::i32;
4595         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
4596       }
4597       unsigned Log2b = Log2_32(VT.getSizeInBits());
4598       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
4599       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
4600                                 DAG.getConstant(Log2b, dl, MVT::i32));
4601       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
4602     }
4603   }
4604   return SDValue();
4605 }