lib/CodeGen/SelectionDAG/TargetLowering.cpp

   1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This implements the TargetLowering class.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "llvm/CodeGen/TargetLowering.h"
  14 #include "llvm/ADT/STLExtras.h"
  15 #include "llvm/CodeGen/CallingConvLower.h"
  16 #include "llvm/CodeGen/MachineFrameInfo.h"
  17 #include "llvm/CodeGen/MachineFunction.h"
  18 #include "llvm/CodeGen/MachineJumpTableInfo.h"
  19 #include "llvm/CodeGen/MachineRegisterInfo.h"
  20 #include "llvm/CodeGen/SelectionDAG.h"
  21 #include "llvm/CodeGen/TargetRegisterInfo.h"
  22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  23 #include "llvm/IR/DataLayout.h"
  24 #include "llvm/IR/DerivedTypes.h"
  25 #include "llvm/IR/GlobalVariable.h"
  26 #include "llvm/IR/LLVMContext.h"
  27 #include "llvm/MC/MCAsmInfo.h"
  28 #include "llvm/MC/MCExpr.h"
  29 #include "llvm/Support/ErrorHandling.h"
  30 #include "llvm/Support/KnownBits.h"
  31 #include "llvm/Support/MathExtras.h"
  32 #include "llvm/Target/TargetLoweringObjectFile.h"
  33 #include "llvm/Target/TargetMachine.h"
  34 #include <cctype>
  35 using namespace llvm;
  36
  37 /// NOTE: The TargetMachine owns TLOF.
  38 TargetLowering::TargetLowering(const TargetMachine &tm)
  39     : TargetLoweringBase(tm) {}
  40
  41 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
  42   return nullptr;
  43 }
  44
  45 bool TargetLowering::isPositionIndependent() const {
  46   return getTargetMachine().isPositionIndependent();
  47 }
  48
  49 /// Check whether a given call node is in tail position within its function. If
  50 /// so, it sets Chain to the input chain of the tail call.
  51 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
  52                                           SDValue &Chain) const {
  53   const Function &F = DAG.getMachineFunction().getFunction();
  54
  55   // Conservatively require the attributes of the call to match those of
  56   // the return. Ignore NoAlias and NonNull because they don't affect the
  57   // call sequence.
  58   AttributeList CallerAttrs = F.getAttributes();
  59   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
  60           .removeAttribute(Attribute::NoAlias)
  61           .removeAttribute(Attribute::NonNull)
  62           .hasAttributes())
  63     return false;
  64
  65   // It's not safe to eliminate the sign / zero extension of the return value.
  66   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
  67       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
  68     return false;
  69
  70   // Check if the only use is a function return node.
  71   return isUsedByReturnOnly(Node, Chain);
  72 }
  73
  74 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
  75     const uint32_t *CallerPreservedMask,
  76     const SmallVectorImpl<CCValAssign> &ArgLocs,
  77     const SmallVectorImpl<SDValue> &OutVals) const {
  78   for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
  79     const CCValAssign &ArgLoc = ArgLocs[I];
  80     if (!ArgLoc.isRegLoc())
  81       continue;
  82     Register Reg = ArgLoc.getLocReg();
  83     // Only look at callee saved registers.
  84     if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
  85       continue;
  86     // Check that we pass the value used for the caller.
  87     // (We look for a CopyFromReg reading a virtual register that is used
  88     //  for the function live-in value of register Reg)
  89     SDValue Value = OutVals[I];
  90     if (Value->getOpcode() != ISD::CopyFromReg)
  91       return false;
  92     unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
  93     if (MRI.getLiveInPhysReg(ArgReg) != Reg)
  94       return false;
  95   }
  96   return true;
  97 }
  98
  99 /// Set CallLoweringInfo attribute flags based on a call instruction
 100 /// and called function attributes.
 101 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
 102                                                      unsigned ArgIdx) {
 103   IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
 104   IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
 105   IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
 106   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
 107   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
 108   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
 109   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
 110   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
 111   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
 112   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
 113   Alignment = Call->getParamAlignment(ArgIdx);
 114   ByValType = nullptr;
 115   if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
 116     ByValType = Call->getParamByValType(ArgIdx);
 117 }
 118
 119 /// Generate a libcall taking the given operands as arguments and returning a
 120 /// result of type RetVT.
 121 std::pair<SDValue, SDValue>
 122 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
 123                             ArrayRef<SDValue> Ops,
 124                             MakeLibCallOptions CallOptions,
 125                             const SDLoc &dl) const {
 126   TargetLowering::ArgListTy Args;
 127   Args.reserve(Ops.size());
 128
 129   TargetLowering::ArgListEntry Entry;
 130   for (unsigned i = 0; i < Ops.size(); ++i) {
 131     SDValue NewOp = Ops[i];
 132     Entry.Node = NewOp;
 133     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
 134     Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
 135                                                  CallOptions.IsSExt);
 136     Entry.IsZExt = !Entry.IsSExt;
 137
 138     if (CallOptions.IsSoften &&
 139         !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
 140       Entry.IsSExt = Entry.IsZExt = false;
 141     }
 142     Args.push_back(Entry);
 143   }
 144
 145   if (LC == RTLIB::UNKNOWN_LIBCALL)
 146     report_fatal_error("Unsupported library call operation!");
 147   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
 148                                          getPointerTy(DAG.getDataLayout()));
 149
 150   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
 151   TargetLowering::CallLoweringInfo CLI(DAG);
 152   bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
 153   bool zeroExtend = !signExtend;
 154
 155   if (CallOptions.IsSoften &&
 156       !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
 157     signExtend = zeroExtend = false;
 158   }
 159
 160   CLI.setDebugLoc(dl)
 161       .setChain(DAG.getEntryNode())
 162       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
 163       .setNoReturn(CallOptions.DoesNotReturn)
 164       .setDiscardResult(!CallOptions.IsReturnValueUsed)
 165       .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
 166       .setSExtResult(signExtend)
 167       .setZExtResult(zeroExtend);
 168   return LowerCallTo(CLI);
 169 }
 170
 171 bool
 172 TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
 173                                          unsigned Limit, uint64_t Size,
 174                                          unsigned DstAlign, unsigned SrcAlign,
 175                                          bool IsMemset,
 176                                          bool ZeroMemset,
 177                                          bool MemcpyStrSrc,
 178                                          bool AllowOverlap,
 179                                          unsigned DstAS, unsigned SrcAS,
 180                                          const AttributeList &FuncAttributes) const {
 181   // If 'SrcAlign' is zero, that means the memory operation does not need to
 182   // load the value, i.e. memset or memcpy from constant string. Otherwise,
 183   // it's the inferred alignment of the source. 'DstAlign', on the other hand,
 184   // is the specified alignment of the memory operation. If it is zero, that
 185   // means it's possible to change the alignment of the destination.
 186   // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
 187   // not need to be loaded.
 188   if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
 189     return false;
 190
 191   EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
 192                                IsMemset, ZeroMemset, MemcpyStrSrc,
 193                                FuncAttributes);
 194
 195   if (VT == MVT::Other) {
 196     // Use the largest integer type whose alignment constraints are satisfied.
 197     // We only need to check DstAlign here as SrcAlign is always greater or
 198     // equal to DstAlign (or zero).
 199     VT = MVT::i64;
 200     while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
 201            !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
 202       VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
 203     assert(VT.isInteger());
 204
 205     // Find the largest legal integer type.
 206     MVT LVT = MVT::i64;
 207     while (!isTypeLegal(LVT))
 208       LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
 209     assert(LVT.isInteger());
 210
 211     // If the type we've chosen is larger than the largest legal integer type
 212     // then use that instead.
 213     if (VT.bitsGT(LVT))
 214       VT = LVT;
 215   }
 216
 217   unsigned NumMemOps = 0;
 218   while (Size != 0) {
 219     unsigned VTSize = VT.getSizeInBits() / 8;
 220     while (VTSize > Size) {
 221       // For now, only use non-vector load / store's for the left-over pieces.
 222       EVT NewVT = VT;
 223       unsigned NewVTSize;
 224
 225       bool Found = false;
 226       if (VT.isVector() || VT.isFloatingPoint()) {
 227         NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
 228         if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
 229             isSafeMemOpType(NewVT.getSimpleVT()))
 230           Found = true;
 231         else if (NewVT == MVT::i64 &&
 232                  isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
 233                  isSafeMemOpType(MVT::f64)) {
 234           // i64 is usually not legal on 32-bit targets, but f64 may be.
 235           NewVT = MVT::f64;
 236           Found = true;
 237         }
 238       }
 239
 240       if (!Found) {
 241         do {
 242           NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
 243           if (NewVT == MVT::i8)
 244             break;
 245         } while (!isSafeMemOpType(NewVT.getSimpleVT()));
 246       }
 247       NewVTSize = NewVT.getSizeInBits() / 8;
 248
 249       // If the new VT cannot cover all of the remaining bits, then consider
 250       // issuing a (or a pair of) unaligned and overlapping load / store.
 251       bool Fast;
 252       if (NumMemOps && AllowOverlap && NewVTSize < Size &&
 253           allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
 254                                          MachineMemOperand::MONone, &Fast) &&
 255           Fast)
 256         VTSize = Size;
 257       else {
 258         VT = NewVT;
 259         VTSize = NewVTSize;
 260       }
 261     }
 262
 263     if (++NumMemOps > Limit)
 264       return false;
 265
 266     MemOps.push_back(VT);
 267     Size -= VTSize;
 268   }
 269
 270   return true;
 271 }
 272
 273 /// Soften the operands of a comparison. This code is shared among BR_CC,
 274 /// SELECT_CC, and SETCC handlers.
 275 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
 276                                          SDValue &NewLHS, SDValue &NewRHS,
 277                                          ISD::CondCode &CCCode,
 278                                          const SDLoc &dl, const SDValue OldLHS,
 279                                          const SDValue OldRHS) const {
 280   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
 281          && "Unsupported setcc type!");
 282
 283   // Expand into one or more soft-fp libcall(s).
 284   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
 285   bool ShouldInvertCC = false;
 286   switch (CCCode) {
 287   case ISD::SETEQ:
 288   case ISD::SETOEQ:
 289     LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
 290           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
 291           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
 292     break;
 293   case ISD::SETNE:
 294   case ISD::SETUNE:
 295     LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
 296           (VT == MVT::f64) ? RTLIB::UNE_F64 :
 297           (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
 298     break;
 299   case ISD::SETGE:
 300   case ISD::SETOGE:
 301     LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
 302           (VT == MVT::f64) ? RTLIB::OGE_F64 :
 303           (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
 304     break;
 305   case ISD::SETLT:
 306   case ISD::SETOLT:
 307     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
 308           (VT == MVT::f64) ? RTLIB::OLT_F64 :
 309           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
 310     break;
 311   case ISD::SETLE:
 312   case ISD::SETOLE:
 313     LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
 314           (VT == MVT::f64) ? RTLIB::OLE_F64 :
 315           (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
 316     break;
 317   case ISD::SETGT:
 318   case ISD::SETOGT:
 319     LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
 320           (VT == MVT::f64) ? RTLIB::OGT_F64 :
 321           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
 322     break;
 323   case ISD::SETUO:
 324     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
 325           (VT == MVT::f64) ? RTLIB::UO_F64 :
 326           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
 327     break;
 328   case ISD::SETO:
 329     LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
 330           (VT == MVT::f64) ? RTLIB::O_F64 :
 331           (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
 332     break;
 333   case ISD::SETONE:
 334     // SETONE = SETOLT | SETOGT
 335     LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
 336           (VT == MVT::f64) ? RTLIB::OLT_F64 :
 337           (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
 338     LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
 339           (VT == MVT::f64) ? RTLIB::OGT_F64 :
 340           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
 341     break;
 342   case ISD::SETUEQ:
 343     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
 344           (VT == MVT::f64) ? RTLIB::UO_F64 :
 345           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
 346     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
 347           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
 348           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
 349     break;
 350   default:
 351     // Invert CC for unordered comparisons
 352     ShouldInvertCC = true;
 353     switch (CCCode) {
 354     case ISD::SETULT:
 355       LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
 356             (VT == MVT::f64) ? RTLIB::OGE_F64 :
 357             (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
 358       break;
 359     case ISD::SETULE:
 360       LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
 361             (VT == MVT::f64) ? RTLIB::OGT_F64 :
 362             (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
 363       break;
 364     case ISD::SETUGT:
 365       LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
 366             (VT == MVT::f64) ? RTLIB::OLE_F64 :
 367             (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
 368       break;
 369     case ISD::SETUGE:
 370       LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
 371             (VT == MVT::f64) ? RTLIB::OLT_F64 :
 372             (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
 373       break;
 374     default: llvm_unreachable("Do not know how to soften this setcc!");
 375     }
 376   }
 377
 378   // Use the target specific return value for comparions lib calls.
 379   EVT RetVT = getCmpLibcallReturnType();
 380   SDValue Ops[2] = {NewLHS, NewRHS};
 381   TargetLowering::MakeLibCallOptions CallOptions;
 382   EVT OpsVT[2] = { OldLHS.getValueType(),
 383                    OldRHS.getValueType() };
 384   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
 385   NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
 386   NewRHS = DAG.getConstant(0, dl, RetVT);
 387
 388   CCCode = getCmpLibcallCC(LC1);
 389   if (ShouldInvertCC)
 390     CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
 391
 392   if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
 393     SDValue Tmp = DAG.getNode(
 394         ISD::SETCC, dl,
 395         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
 396         NewLHS, NewRHS, DAG.getCondCode(CCCode));
 397     NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
 398     NewLHS = DAG.getNode(
 399         ISD::SETCC, dl,
 400         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
 401         NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
 402     NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
 403     NewRHS = SDValue();
 404   }
 405 }
 406
 407 /// Return the entry encoding for a jump table in the current function. The
 408 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
 409 unsigned TargetLowering::getJumpTableEncoding() const {
 410   // In non-pic modes, just use the address of a block.
 411   if (!isPositionIndependent())
 412     return MachineJumpTableInfo::EK_BlockAddress;
 413
 414   // In PIC mode, if the target supports a GPRel32 directive, use it.
 415   if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
 416     return MachineJumpTableInfo::EK_GPRel32BlockAddress;
 417
 418   // Otherwise, use a label difference.
 419   return MachineJumpTableInfo::EK_LabelDifference32;
 420 }
 421
 422 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
 423                                                  SelectionDAG &DAG) const {
 424   // If our PIC model is GP relative, use the global offset table as the base.
 425   unsigned JTEncoding = getJumpTableEncoding();
 426
 427   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
 428       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
 429     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
 430
 431   return Table;
 432 }
 433
 434 /// This returns the relocation base for the given PIC jumptable, the same as
 435 /// getPICJumpTableRelocBase, but as an MCExpr.
 436 const MCExpr *
 437 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
 438                                              unsigned JTI,MCContext &Ctx) const{
 439   // The normal PIC reloc base is the label at the start of the jump table.
 440   return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
 441 }
 442
 443 bool
 444 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 445   const TargetMachine &TM = getTargetMachine();
 446   const GlobalValue *GV = GA->getGlobal();
 447
 448   // If the address is not even local to this DSO we will have to load it from
 449   // a got and then add the offset.
 450   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
 451     return false;
 452
 453   // If the code is position independent we will have to add a base register.
 454   if (isPositionIndependent())
 455     return false;
 456
 457   // Otherwise we can do it.
 458   return true;
 459 }
 460
 461 //===----------------------------------------------------------------------===//
 462 //  Optimization Methods
 463 //===----------------------------------------------------------------------===//
 464
 465 /// If the specified instruction has a constant integer operand and there are
 466 /// bits set in that constant that are not demanded, then clear those bits and
 467 /// return true.
 468 bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
 469                                             TargetLoweringOpt &TLO) const {
 470   SDLoc DL(Op);
 471   unsigned Opcode = Op.getOpcode();
 472
 473   // Do target-specific constant optimization.
 474   if (targetShrinkDemandedConstant(Op, Demanded, TLO))
 475     return TLO.New.getNode();
 476
 477   // FIXME: ISD::SELECT, ISD::SELECT_CC
 478   switch (Opcode) {
 479   default:
 480     break;
 481   case ISD::XOR:
 482   case ISD::AND:
 483   case ISD::OR: {
 484     auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
 485     if (!Op1C)
 486       return false;
 487
 488     // If this is a 'not' op, don't touch it because that's a canonical form.
 489     const APInt &C = Op1C->getAPIntValue();
 490     if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
 491       return false;
 492
 493     if (!C.isSubsetOf(Demanded)) {
 494       EVT VT = Op.getValueType();
 495       SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
 496       SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
 497       return TLO.CombineTo(Op, NewOp);
 498     }
 499
 500     break;
 501   }
 502   }
 503
 504   return false;
 505 }
 506
 507 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
 508 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
 509 /// generalized for targets with other types of implicit widening casts.
 510 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
 511                                       const APInt &Demanded,
 512                                       TargetLoweringOpt &TLO) const {
 513   assert(Op.getNumOperands() == 2 &&
 514          "ShrinkDemandedOp only supports binary operators!");
 515   assert(Op.getNode()->getNumValues() == 1 &&
 516          "ShrinkDemandedOp only supports nodes with one result!");
 517
 518   SelectionDAG &DAG = TLO.DAG;
 519   SDLoc dl(Op);
 520
 521   // Early return, as this function cannot handle vector types.
 522   if (Op.getValueType().isVector())
 523     return false;
 524
 525   // Don't do this if the node has another user, which may require the
 526   // full value.
 527   if (!Op.getNode()->hasOneUse())
 528     return false;
 529
 530   // Search for the smallest integer type with free casts to and from
 531   // Op's type. For expedience, just check power-of-2 integer types.
 532   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 533   unsigned DemandedSize = Demanded.getActiveBits();
 534   unsigned SmallVTBits = DemandedSize;
 535   if (!isPowerOf2_32(SmallVTBits))
 536     SmallVTBits = NextPowerOf2(SmallVTBits);
 537   for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
 538     EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
 539     if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
 540         TLI.isZExtFree(SmallVT, Op.getValueType())) {
 541       // We found a type with free casts.
 542       SDValue X = DAG.getNode(
 543           Op.getOpcode(), dl, SmallVT,
 544           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
 545           DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
 546       assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
 547       SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
 548       return TLO.CombineTo(Op, Z);
 549     }
 550   }
 551   return false;
 552 }
 553
 554 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 555                                           DAGCombinerInfo &DCI) const {
 556   SelectionDAG &DAG = DCI.DAG;
 557   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
 558                         !DCI.isBeforeLegalizeOps());
 559   KnownBits Known;
 560
 561   bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
 562   if (Simplified) {
 563     DCI.AddToWorklist(Op.getNode());
 564     DCI.CommitTargetLoweringOpt(TLO);
 565   }
 566   return Simplified;
 567 }
 568
 569 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 570                                           KnownBits &Known,
 571                                           TargetLoweringOpt &TLO,
 572                                           unsigned Depth,
 573                                           bool AssumeSingleUse) const {
 574   EVT VT = Op.getValueType();
 575   APInt DemandedElts = VT.isVector()
 576                            ? APInt::getAllOnesValue(VT.getVectorNumElements())
 577                            : APInt(1, 1);
 578   return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
 579                               AssumeSingleUse);
 580 }
 581
 582 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
 583 // TODO: Under what circumstances can we create nodes? Constant folding?
 584 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
 585     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
 586     SelectionDAG &DAG, unsigned Depth) const {
 587   // Limit search depth.
 588   if (Depth >= 6)
 589     return SDValue();
 590
 591   // Ignore UNDEFs.
 592   if (Op.isUndef())
 593     return SDValue();
 594
 595   // Not demanding any bits/elts from Op.
 596   if (DemandedBits == 0 || DemandedElts == 0)
 597     return DAG.getUNDEF(Op.getValueType());
 598
 599   unsigned NumElts = DemandedElts.getBitWidth();
 600   KnownBits LHSKnown, RHSKnown;
 601   switch (Op.getOpcode()) {
 602   case ISD::BITCAST: {
 603     SDValue Src = peekThroughBitcasts(Op.getOperand(0));
 604     EVT SrcVT = Src.getValueType();
 605     EVT DstVT = Op.getValueType();
 606     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
 607     unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
 608
 609     if (NumSrcEltBits == NumDstEltBits)
 610       if (SDValue V = SimplifyMultipleUseDemandedBits(
 611               Src, DemandedBits, DemandedElts, DAG, Depth + 1))
 612         return DAG.getBitcast(DstVT, V);
 613
 614     // TODO - bigendian once we have test coverage.
 615     if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
 616         DAG.getDataLayout().isLittleEndian()) {
 617       unsigned Scale = NumDstEltBits / NumSrcEltBits;
 618       unsigned NumSrcElts = SrcVT.getVectorNumElements();
 619       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
 620       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
 621       for (unsigned i = 0; i != Scale; ++i) {
 622         unsigned Offset = i * NumSrcEltBits;
 623         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
 624         if (!Sub.isNullValue()) {
 625           DemandedSrcBits |= Sub;
 626           for (unsigned j = 0; j != NumElts; ++j)
 627             if (DemandedElts[j])
 628               DemandedSrcElts.setBit((j * Scale) + i);
 629         }
 630       }
 631
 632       if (SDValue V = SimplifyMultipleUseDemandedBits(
 633               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
 634         return DAG.getBitcast(DstVT, V);
 635     }
 636
 637     // TODO - bigendian once we have test coverage.
 638     if ((NumSrcEltBits % NumDstEltBits) == 0 &&
 639         DAG.getDataLayout().isLittleEndian()) {
 640       unsigned Scale = NumSrcEltBits / NumDstEltBits;
 641       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
 642       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
 643       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
 644       for (unsigned i = 0; i != NumElts; ++i)
 645         if (DemandedElts[i]) {
 646           unsigned Offset = (i % Scale) * NumDstEltBits;
 647           DemandedSrcBits.insertBits(DemandedBits, Offset);
 648           DemandedSrcElts.setBit(i / Scale);
 649         }
 650
 651       if (SDValue V = SimplifyMultipleUseDemandedBits(
 652               Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
 653         return DAG.getBitcast(DstVT, V);
 654     }
 655
 656     break;
 657   }
 658   case ISD::AND: {
 659     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
 660     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
 661
 662     // If all of the demanded bits are known 1 on one side, return the other.
 663     // These bits cannot contribute to the result of the 'and' in this
 664     // context.
 665     if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
 666       return Op.getOperand(0);
 667     if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
 668       return Op.getOperand(1);
 669     break;
 670   }
 671   case ISD::OR: {
 672     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
 673     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
 674
 675     // If all of the demanded bits are known zero on one side, return the
 676     // other.  These bits cannot contribute to the result of the 'or' in this
 677     // context.
 678     if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
 679       return Op.getOperand(0);
 680     if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
 681       return Op.getOperand(1);
 682     break;
 683   }
 684   case ISD::XOR: {
 685     LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
 686     RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
 687
 688     // If all of the demanded bits are known zero on one side, return the
 689     // other.
 690     if (DemandedBits.isSubsetOf(RHSKnown.Zero))
 691       return Op.getOperand(0);
 692     if (DemandedBits.isSubsetOf(LHSKnown.Zero))
 693       return Op.getOperand(1);
 694     break;
 695   }
 696   case ISD::SIGN_EXTEND_INREG: {
 697     // If none of the extended bits are demanded, eliminate the sextinreg.
 698     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
 699     if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
 700       return Op.getOperand(0);
 701     break;
 702   }
 703   case ISD::INSERT_VECTOR_ELT: {
 704     // If we don't demand the inserted element, return the base vector.
 705     SDValue Vec = Op.getOperand(0);
 706     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
 707     EVT VecVT = Vec.getValueType();
 708     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
 709         !DemandedElts[CIdx->getZExtValue()])
 710       return Vec;
 711     break;
 712   }
 713   case ISD::VECTOR_SHUFFLE: {
 714     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
 715
 716     // If all the demanded elts are from one operand and are inline,
 717     // then we can use the operand directly.
 718     bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
 719     for (unsigned i = 0; i != NumElts; ++i) {
 720       int M = ShuffleMask[i];
 721       if (M < 0 || !DemandedElts[i])
 722         continue;
 723       AllUndef = false;
 724       IdentityLHS &= (M == (int)i);
 725       IdentityRHS &= ((M - NumElts) == i);
 726     }
 727
 728     if (AllUndef)
 729       return DAG.getUNDEF(Op.getValueType());
 730     if (IdentityLHS)
 731       return Op.getOperand(0);
 732     if (IdentityRHS)
 733       return Op.getOperand(1);
 734     break;
 735   }
 736   default:
 737     if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
 738       if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
 739               Op, DemandedBits, DemandedElts, DAG, Depth))
 740         return V;
 741     break;
 742   }
 743   return SDValue();
 744 }
 745
 746 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
 747 /// result of Op are ever used downstream. If we can use this information to
 748 /// simplify Op, create a new simplified DAG node and return true, returning the
 749 /// original and new nodes in Old and New. Otherwise, analyze the expression and
 750 /// return a mask of Known bits for the expression (used to simplify the
 751 /// caller).  The Known bits may only be accurate for those bits in the
 752 /// OriginalDemandedBits and OriginalDemandedElts.
 753 bool TargetLowering::SimplifyDemandedBits(
 754     SDValue Op, const APInt &OriginalDemandedBits,
 755     const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
 756     unsigned Depth, bool AssumeSingleUse) const {
 757   unsigned BitWidth = OriginalDemandedBits.getBitWidth();
 758   assert(Op.getScalarValueSizeInBits() == BitWidth &&
 759          "Mask size mismatches value type size!");
 760
 761   unsigned NumElts = OriginalDemandedElts.getBitWidth();
 762   assert((!Op.getValueType().isVector() ||
 763           NumElts == Op.getValueType().getVectorNumElements()) &&
 764          "Unexpected vector size");
 765
 766   APInt DemandedBits = OriginalDemandedBits;
 767   APInt DemandedElts = OriginalDemandedElts;
 768   SDLoc dl(Op);
 769   auto &DL = TLO.DAG.getDataLayout();
 770
 771   // Don't know anything.
 772   Known = KnownBits(BitWidth);
 773
 774   // Undef operand.
 775   if (Op.isUndef())
 776     return false;
 777
 778   if (Op.getOpcode() == ISD::Constant) {
 779     // We know all of the bits for a constant!
 780     Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
 781     Known.Zero = ~Known.One;
 782     return false;
 783   }
 784
 785   // Other users may use these bits.
 786   EVT VT = Op.getValueType();
 787   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
 788     if (Depth != 0) {
 789       // If not at the root, Just compute the Known bits to
 790       // simplify things downstream.
 791       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
 792       return false;
 793     }
 794     // If this is the root being simplified, allow it to have multiple uses,
 795     // just set the DemandedBits/Elts to all bits.
 796     DemandedBits = APInt::getAllOnesValue(BitWidth);
 797     DemandedElts = APInt::getAllOnesValue(NumElts);
 798   } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
 799     // Not demanding any bits/elts from Op.
 800     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
 801   } else if (Depth >= 6) { // Limit search depth.
 802     return false;
 803   }
 804
 805   KnownBits Known2, KnownOut;
 806   switch (Op.getOpcode()) {
 807   case ISD::SCALAR_TO_VECTOR: {
 808     if (!DemandedElts[0])
 809       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
 810
 811     KnownBits SrcKnown;
 812     SDValue Src = Op.getOperand(0);
 813     unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
 814     APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
 815     if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
 816       return true;
 817     Known = SrcKnown.zextOrTrunc(BitWidth, false);
 818     break;
 819   }
 820   case ISD::BUILD_VECTOR:
 821     // Collect the known bits that are shared by every demanded element.
 822     // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
 823     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
 824     return false; // Don't fall through, will infinitely loop.
 825   case ISD::LOAD: {
 826     LoadSDNode *LD = cast<LoadSDNode>(Op);
 827     if (getTargetConstantFromLoad(LD)) {
 828       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
 829       return false; // Don't fall through, will infinitely loop.
 830     }
 831     break;
 832   }
 833   case ISD::INSERT_VECTOR_ELT: {
 834     SDValue Vec = Op.getOperand(0);
 835     SDValue Scl = Op.getOperand(1);
 836     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
 837     EVT VecVT = Vec.getValueType();
 838
 839     // If index isn't constant, assume we need all vector elements AND the
 840     // inserted element.
 841     APInt DemandedVecElts(DemandedElts);
 842     if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
 843       unsigned Idx = CIdx->getZExtValue();
 844       DemandedVecElts.clearBit(Idx);
 845
 846       // Inserted element is not required.
 847       if (!DemandedElts[Idx])
 848         return TLO.CombineTo(Op, Vec);
 849     }
 850
 851     KnownBits KnownScl;
 852     unsigned NumSclBits = Scl.getScalarValueSizeInBits();
 853     APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
 854     if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
 855       return true;
 856
 857     Known = KnownScl.zextOrTrunc(BitWidth, false);
 858
 859     KnownBits KnownVec;
 860     if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
 861                              Depth + 1))
 862       return true;
 863
 864     if (!!DemandedVecElts) {
 865       Known.One &= KnownVec.One;
 866       Known.Zero &= KnownVec.Zero;
 867     }
 868
 869     return false;
 870   }
 871   case ISD::INSERT_SUBVECTOR: {
 872     SDValue Base = Op.getOperand(0);
 873     SDValue Sub = Op.getOperand(1);
 874     EVT SubVT = Sub.getValueType();
 875     unsigned NumSubElts = SubVT.getVectorNumElements();
 876
 877     // If index isn't constant, assume we need the original demanded base
 878     // elements and ALL the inserted subvector elements.
 879     APInt BaseElts = DemandedElts;
 880     APInt SubElts = APInt::getAllOnesValue(NumSubElts);
 881     if (isa<ConstantSDNode>(Op.getOperand(2))) {
 882       const APInt &Idx = Op.getConstantOperandAPInt(2);
 883       if (Idx.ule(NumElts - NumSubElts)) {
 884         unsigned SubIdx = Idx.getZExtValue();
 885         SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
 886         BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
 887       }
 888     }
 889
 890     KnownBits KnownSub, KnownBase;
 891     if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
 892                              Depth + 1))
 893       return true;
 894     if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
 895                              Depth + 1))
 896       return true;
 897
 898     Known.Zero.setAllBits();
 899     Known.One.setAllBits();
 900     if (!!SubElts) {
 901         Known.One &= KnownSub.One;
 902         Known.Zero &= KnownSub.Zero;
 903     }
 904     if (!!BaseElts) {
 905         Known.One &= KnownBase.One;
 906         Known.Zero &= KnownBase.Zero;
 907     }
 908     break;
 909   }
 910   case ISD::EXTRACT_SUBVECTOR: {
 911     // If index isn't constant, assume we need all the source vector elements.
 912     SDValue Src = Op.getOperand(0);
 913     ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
 914     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
 915     APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
 916     if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
 917       // Offset the demanded elts by the subvector index.
 918       uint64_t Idx = SubIdx->getZExtValue();
 919       SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
 920     }
 921     if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
 922       return true;
 923     break;
 924   }
 925   case ISD::CONCAT_VECTORS: {
 926     Known.Zero.setAllBits();
 927     Known.One.setAllBits();
 928     EVT SubVT = Op.getOperand(0).getValueType();
 929     unsigned NumSubVecs = Op.getNumOperands();
 930     unsigned NumSubElts = SubVT.getVectorNumElements();
 931     for (unsigned i = 0; i != NumSubVecs; ++i) {
 932       APInt DemandedSubElts =
 933           DemandedElts.extractBits(NumSubElts, i * NumSubElts);
 934       if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
 935                                Known2, TLO, Depth + 1))
 936         return true;
 937       // Known bits are shared by every demanded subvector element.
 938       if (!!DemandedSubElts) {
 939         Known.One &= Known2.One;
 940         Known.Zero &= Known2.Zero;
 941       }
 942     }
 943     break;
 944   }
 945   case ISD::VECTOR_SHUFFLE: {
 946     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
 947
 948     // Collect demanded elements from shuffle operands..
 949     APInt DemandedLHS(NumElts, 0);
 950     APInt DemandedRHS(NumElts, 0);
 951     for (unsigned i = 0; i != NumElts; ++i) {
 952       if (!DemandedElts[i])
 953         continue;
 954       int M = ShuffleMask[i];
 955       if (M < 0) {
 956         // For UNDEF elements, we don't know anything about the common state of
 957         // the shuffle result.
 958         DemandedLHS.clearAllBits();
 959         DemandedRHS.clearAllBits();
 960         break;
 961       }
 962       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
 963       if (M < (int)NumElts)
 964         DemandedLHS.setBit(M);
 965       else
 966         DemandedRHS.setBit(M - NumElts);
 967     }
 968
 969     if (!!DemandedLHS || !!DemandedRHS) {
 970       SDValue Op0 = Op.getOperand(0);
 971       SDValue Op1 = Op.getOperand(1);
 972
 973       Known.Zero.setAllBits();
 974       Known.One.setAllBits();
 975       if (!!DemandedLHS) {
 976         if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
 977                                  Depth + 1))
 978           return true;
 979         Known.One &= Known2.One;
 980         Known.Zero &= Known2.Zero;
 981       }
 982       if (!!DemandedRHS) {
 983         if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
 984                                  Depth + 1))
 985           return true;
 986         Known.One &= Known2.One;
 987         Known.Zero &= Known2.Zero;
 988       }
 989
 990       // Attempt to avoid multi-use ops if we don't need anything from them.
 991       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
 992           Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
 993       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
 994           Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
 995       if (DemandedOp0 || DemandedOp1) {
 996         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
 997         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
 998         SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
 999         return TLO.CombineTo(Op, NewOp);
1000       }
1001     }
1002     break;
1003   }
1004   case ISD::AND: {
1005     SDValue Op0 = Op.getOperand(0);
1006     SDValue Op1 = Op.getOperand(1);
1007
1008     // If the RHS is a constant, check to see if the LHS would be zero without
1009     // using the bits from the RHS.  Below, we use knowledge about the RHS to
1010     // simplify the LHS, here we're using information from the LHS to simplify
1011     // the RHS.
1012     if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1013       // Do not increment Depth here; that can cause an infinite loop.
1014       KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1015       // If the LHS already has zeros where RHSC does, this 'and' is dead.
1016       if ((LHSKnown.Zero & DemandedBits) ==
1017           (~RHSC->getAPIntValue() & DemandedBits))
1018         return TLO.CombineTo(Op, Op0);
1019
1020       // If any of the set bits in the RHS are known zero on the LHS, shrink
1021       // the constant.
1022       if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO))
1023         return true;
1024
1025       // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1026       // constant, but if this 'and' is only clearing bits that were just set by
1027       // the xor, then this 'and' can be eliminated by shrinking the mask of
1028       // the xor. For example, for a 32-bit X:
1029       // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1030       if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1031           LHSKnown.One == ~RHSC->getAPIntValue()) {
1032         SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1033         return TLO.CombineTo(Op, Xor);
1034       }
1035     }
1036
1037     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1038                              Depth + 1))
1039       return true;
1040     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1041     if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1042                              Known2, TLO, Depth + 1))
1043       return true;
1044     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1045
1046     // Attempt to avoid multi-use ops if we don't need anything from them.
1047     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1048       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1049           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1050       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1051           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1052       if (DemandedOp0 || DemandedOp1) {
1053         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1054         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1055         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1056         return TLO.CombineTo(Op, NewOp);
1057       }
1058     }
1059
1060     // If all of the demanded bits are known one on one side, return the other.
1061     // These bits cannot contribute to the result of the 'and'.
1062     if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1063       return TLO.CombineTo(Op, Op0);
1064     if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1065       return TLO.CombineTo(Op, Op1);
1066     // If all of the demanded bits in the inputs are known zeros, return zero.
1067     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1068       return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1069     // If the RHS is a constant, see if we can simplify it.
1070     if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO))
1071       return true;
1072     // If the operation can be done in a smaller type, do so.
1073     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1074       return true;
1075
1076     // Output known-1 bits are only known if set in both the LHS & RHS.
1077     Known.One &= Known2.One;
1078     // Output known-0 are known to be clear if zero in either the LHS | RHS.
1079     Known.Zero |= Known2.Zero;
1080     break;
1081   }
1082   case ISD::OR: {
1083     SDValue Op0 = Op.getOperand(0);
1084     SDValue Op1 = Op.getOperand(1);
1085
1086     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1087                              Depth + 1))
1088       return true;
1089     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1090     if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1091                              Known2, TLO, Depth + 1))
1092       return true;
1093     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1094
1095     // Attempt to avoid multi-use ops if we don't need anything from them.
1096     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1097       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1098           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1099       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1100           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1101       if (DemandedOp0 || DemandedOp1) {
1102         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1103         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1104         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1105         return TLO.CombineTo(Op, NewOp);
1106       }
1107     }
1108
1109     // If all of the demanded bits are known zero on one side, return the other.
1110     // These bits cannot contribute to the result of the 'or'.
1111     if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1112       return TLO.CombineTo(Op, Op0);
1113     if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1114       return TLO.CombineTo(Op, Op1);
1115     // If the RHS is a constant, see if we can simplify it.
1116     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1117       return true;
1118     // If the operation can be done in a smaller type, do so.
1119     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1120       return true;
1121
1122     // Output known-0 bits are only known if clear in both the LHS & RHS.
1123     Known.Zero &= Known2.Zero;
1124     // Output known-1 are known to be set if set in either the LHS | RHS.
1125     Known.One |= Known2.One;
1126     break;
1127   }
1128   case ISD::XOR: {
1129     SDValue Op0 = Op.getOperand(0);
1130     SDValue Op1 = Op.getOperand(1);
1131
1132     if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1133                              Depth + 1))
1134       return true;
1135     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1136     if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1137                              Depth + 1))
1138       return true;
1139     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1140
1141     // Attempt to avoid multi-use ops if we don't need anything from them.
1142     if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1143       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1144           Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1145       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1146           Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1147       if (DemandedOp0 || DemandedOp1) {
1148         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1149         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1150         SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1151         return TLO.CombineTo(Op, NewOp);
1152       }
1153     }
1154
1155     // If all of the demanded bits are known zero on one side, return the other.
1156     // These bits cannot contribute to the result of the 'xor'.
1157     if (DemandedBits.isSubsetOf(Known.Zero))
1158       return TLO.CombineTo(Op, Op0);
1159     if (DemandedBits.isSubsetOf(Known2.Zero))
1160       return TLO.CombineTo(Op, Op1);
1161     // If the operation can be done in a smaller type, do so.
1162     if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1163       return true;
1164
1165     // If all of the unknown bits are known to be zero on one side or the other
1166     // (but not both) turn this into an *inclusive* or.
1167     //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1168     if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1169       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1170
1171     // Output known-0 bits are known if clear or set in both the LHS & RHS.
1172     KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
1173     // Output known-1 are known to be set if set in only one of the LHS, RHS.
1174     KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
1175
1176     if (ConstantSDNode *C = isConstOrConstSplat(Op1)) {
1177       // If one side is a constant, and all of the known set bits on the other
1178       // side are also set in the constant, turn this into an AND, as we know
1179       // the bits will be cleared.
1180       //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1181       // NB: it is okay if more bits are known than are requested
1182       if (C->getAPIntValue() == Known2.One) {
1183         SDValue ANDC =
1184             TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1185         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1186       }
1187
1188       // If the RHS is a constant, see if we can change it. Don't alter a -1
1189       // constant because that's a 'not' op, and that is better for combining
1190       // and codegen.
1191       if (!C->isAllOnesValue()) {
1192         if (DemandedBits.isSubsetOf(C->getAPIntValue())) {
1193           // We're flipping all demanded bits. Flip the undemanded bits too.
1194           SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1195           return TLO.CombineTo(Op, New);
1196         }
1197         // If we can't turn this into a 'not', try to shrink the constant.
1198         if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1199           return true;
1200       }
1201     }
1202
1203     Known = std::move(KnownOut);
1204     break;
1205   }
1206   case ISD::SELECT:
1207     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1208                              Depth + 1))
1209       return true;
1210     if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1211                              Depth + 1))
1212       return true;
1213     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1214     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1215
1216     // If the operands are constants, see if we can simplify them.
1217     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1218       return true;
1219
1220     // Only known if known in both the LHS and RHS.
1221     Known.One &= Known2.One;
1222     Known.Zero &= Known2.Zero;
1223     break;
1224   case ISD::SELECT_CC:
1225     if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1226                              Depth + 1))
1227       return true;
1228     if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1229                              Depth + 1))
1230       return true;
1231     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1232     assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1233
1234     // If the operands are constants, see if we can simplify them.
1235     if (ShrinkDemandedConstant(Op, DemandedBits, TLO))
1236       return true;
1237
1238     // Only known if known in both the LHS and RHS.
1239     Known.One &= Known2.One;
1240     Known.Zero &= Known2.Zero;
1241     break;
1242   case ISD::SETCC: {
1243     SDValue Op0 = Op.getOperand(0);
1244     SDValue Op1 = Op.getOperand(1);
1245     ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1246     // If (1) we only need the sign-bit, (2) the setcc operands are the same
1247     // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1248     // -1, we may be able to bypass the setcc.
1249     if (DemandedBits.isSignMask() &&
1250         Op0.getScalarValueSizeInBits() == BitWidth &&
1251         getBooleanContents(VT) ==
1252             BooleanContent::ZeroOrNegativeOneBooleanContent) {
1253       // If we're testing X < 0, then this compare isn't needed - just use X!
1254       // FIXME: We're limiting to integer types here, but this should also work
1255       // if we don't care about FP signed-zero. The use of SETLT with FP means
1256       // that we don't care about NaNs.
1257       if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1258           (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1259         return TLO.CombineTo(Op, Op0);
1260
1261       // TODO: Should we check for other forms of sign-bit comparisons?
1262       // Examples: X <= -1, X >= 0
1263     }
1264     if (getBooleanContents(Op0.getValueType()) ==
1265             TargetLowering::ZeroOrOneBooleanContent &&
1266         BitWidth > 1)
1267       Known.Zero.setBitsFrom(1);
1268     break;
1269   }
1270   case ISD::SHL: {
1271     SDValue Op0 = Op.getOperand(0);
1272     SDValue Op1 = Op.getOperand(1);
1273
1274     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1275       // If the shift count is an invalid immediate, don't do anything.
1276       if (SA->getAPIntValue().uge(BitWidth))
1277         break;
1278
1279       unsigned ShAmt = SA->getZExtValue();
1280       if (ShAmt == 0)
1281         return TLO.CombineTo(Op, Op0);
1282
1283       // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1284       // single shift.  We can do this if the bottom bits (which are shifted
1285       // out) are never demanded.
1286       // TODO - support non-uniform vector amounts.
1287       if (Op0.getOpcode() == ISD::SRL) {
1288         if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
1289           if (ConstantSDNode *SA2 =
1290                   isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1291             if (SA2->getAPIntValue().ult(BitWidth)) {
1292               unsigned C1 = SA2->getZExtValue();
1293               unsigned Opc = ISD::SHL;
1294               int Diff = ShAmt - C1;
1295               if (Diff < 0) {
1296                 Diff = -Diff;
1297                 Opc = ISD::SRL;
1298               }
1299
1300               SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
1301               return TLO.CombineTo(
1302                   Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1303             }
1304           }
1305         }
1306       }
1307
1308       if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
1309                                Known, TLO, Depth + 1))
1310         return true;
1311
1312       // Try shrinking the operation as long as the shift amount will still be
1313       // in range.
1314       if ((ShAmt < DemandedBits.getActiveBits()) &&
1315           ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1316         return true;
1317
1318       // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1319       // are not demanded. This will likely allow the anyext to be folded away.
1320       if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1321         SDValue InnerOp = Op0.getOperand(0);
1322         EVT InnerVT = InnerOp.getValueType();
1323         unsigned InnerBits = InnerVT.getScalarSizeInBits();
1324         if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1325             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1326           EVT ShTy = getShiftAmountTy(InnerVT, DL);
1327           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
1328             ShTy = InnerVT;
1329           SDValue NarrowShl =
1330               TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
1331                               TLO.DAG.getConstant(ShAmt, dl, ShTy));
1332           return TLO.CombineTo(
1333               Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1334         }
1335         // Repeat the SHL optimization above in cases where an extension
1336         // intervenes: (shl (anyext (shr x, c1)), c2) to
1337         // (shl (anyext x), c2-c1).  This requires that the bottom c1 bits
1338         // aren't demanded (as above) and that the shifted upper c1 bits of
1339         // x aren't demanded.
1340         if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
1341             InnerOp.hasOneUse()) {
1342           if (ConstantSDNode *SA2 =
1343                   isConstOrConstSplat(InnerOp.getOperand(1))) {
1344             unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
1345             if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1346                 DemandedBits.getActiveBits() <=
1347                     (InnerBits - InnerShAmt + ShAmt) &&
1348                 DemandedBits.countTrailingZeros() >= ShAmt) {
1349               SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
1350                                                   Op1.getValueType());
1351               SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1352                                                InnerOp.getOperand(0));
1353               return TLO.CombineTo(
1354                   Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1355             }
1356           }
1357         }
1358       }
1359
1360       Known.Zero <<= ShAmt;
1361       Known.One <<= ShAmt;
1362       // low bits known zero.
1363       Known.Zero.setLowBits(ShAmt);
1364     }
1365     break;
1366   }
1367   case ISD::SRL: {
1368     SDValue Op0 = Op.getOperand(0);
1369     SDValue Op1 = Op.getOperand(1);
1370
1371     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1372       // If the shift count is an invalid immediate, don't do anything.
1373       if (SA->getAPIntValue().uge(BitWidth))
1374         break;
1375
1376       unsigned ShAmt = SA->getZExtValue();
1377       if (ShAmt == 0)
1378         return TLO.CombineTo(Op, Op0);
1379
1380       EVT ShiftVT = Op1.getValueType();
1381       APInt InDemandedMask = (DemandedBits << ShAmt);
1382
1383       // If the shift is exact, then it does demand the low bits (and knows that
1384       // they are zero).
1385       if (Op->getFlags().hasExact())
1386         InDemandedMask.setLowBits(ShAmt);
1387
1388       // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1389       // single shift.  We can do this if the top bits (which are shifted out)
1390       // are never demanded.
1391       // TODO - support non-uniform vector amounts.
1392       if (Op0.getOpcode() == ISD::SHL) {
1393         if (ConstantSDNode *SA2 =
1394                 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1395           if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
1396             if (SA2->getAPIntValue().ult(BitWidth)) {
1397               unsigned C1 = SA2->getZExtValue();
1398               unsigned Opc = ISD::SRL;
1399               int Diff = ShAmt - C1;
1400               if (Diff < 0) {
1401                 Diff = -Diff;
1402                 Opc = ISD::SHL;
1403               }
1404
1405               SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1406               return TLO.CombineTo(
1407                   Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1408             }
1409           }
1410         }
1411       }
1412
1413       // Compute the new bits that are at the top now.
1414       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1415                                Depth + 1))
1416         return true;
1417       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1418       Known.Zero.lshrInPlace(ShAmt);
1419       Known.One.lshrInPlace(ShAmt);
1420
1421       Known.Zero.setHighBits(ShAmt); // High bits known zero.
1422     }
1423     break;
1424   }
1425   case ISD::SRA: {
1426     SDValue Op0 = Op.getOperand(0);
1427     SDValue Op1 = Op.getOperand(1);
1428
1429     // If this is an arithmetic shift right and only the low-bit is set, we can
1430     // always convert this into a logical shr, even if the shift amount is
1431     // variable.  The low bit of the shift cannot be an input sign bit unless
1432     // the shift amount is >= the size of the datatype, which is undefined.
1433     if (DemandedBits.isOneValue())
1434       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1435
1436     if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
1437       // If the shift count is an invalid immediate, don't do anything.
1438       if (SA->getAPIntValue().uge(BitWidth))
1439         break;
1440
1441       unsigned ShAmt = SA->getZExtValue();
1442       if (ShAmt == 0)
1443         return TLO.CombineTo(Op, Op0);
1444
1445       APInt InDemandedMask = (DemandedBits << ShAmt);
1446
1447       // If the shift is exact, then it does demand the low bits (and knows that
1448       // they are zero).
1449       if (Op->getFlags().hasExact())
1450         InDemandedMask.setLowBits(ShAmt);
1451
1452       // If any of the demanded bits are produced by the sign extension, we also
1453       // demand the input sign bit.
1454       if (DemandedBits.countLeadingZeros() < ShAmt)
1455         InDemandedMask.setSignBit();
1456
1457       if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1458                                Depth + 1))
1459         return true;
1460       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1461       Known.Zero.lshrInPlace(ShAmt);
1462       Known.One.lshrInPlace(ShAmt);
1463
1464       // If the input sign bit is known to be zero, or if none of the top bits
1465       // are demanded, turn this into an unsigned shift right.
1466       if (Known.Zero[BitWidth - ShAmt - 1] ||
1467           DemandedBits.countLeadingZeros() >= ShAmt) {
1468         SDNodeFlags Flags;
1469         Flags.setExact(Op->getFlags().hasExact());
1470         return TLO.CombineTo(
1471             Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1472       }
1473
1474       int Log2 = DemandedBits.exactLogBase2();
1475       if (Log2 >= 0) {
1476         // The bit must come from the sign.
1477         SDValue NewSA =
1478             TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType());
1479         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1480       }
1481
1482       if (Known.One[BitWidth - ShAmt - 1])
1483         // New bits are known one.
1484         Known.One.setHighBits(ShAmt);
1485     }
1486     break;
1487   }
1488   case ISD::FSHL:
1489   case ISD::FSHR: {
1490     SDValue Op0 = Op.getOperand(0);
1491     SDValue Op1 = Op.getOperand(1);
1492     SDValue Op2 = Op.getOperand(2);
1493     bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1494
1495     if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1496       unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1497
1498       // For fshl, 0-shift returns the 1st arg.
1499       // For fshr, 0-shift returns the 2nd arg.
1500       if (Amt == 0) {
1501         if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1502                                  Known, TLO, Depth + 1))
1503           return true;
1504         break;
1505       }
1506
1507       // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1508       // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1509       APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1510       APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1511       if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1512                                Depth + 1))
1513         return true;
1514       if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1515                                Depth + 1))
1516         return true;
1517
1518       Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
1519       Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
1520       Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1521       Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
1522       Known.One |= Known2.One;
1523       Known.Zero |= Known2.Zero;
1524     }
1525     break;
1526   }
1527   case ISD::BITREVERSE: {
1528     SDValue Src = Op.getOperand(0);
1529     APInt DemandedSrcBits = DemandedBits.reverseBits();
1530     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
1531                              Depth + 1))
1532       return true;
1533     Known.One = Known2.One.reverseBits();
1534     Known.Zero = Known2.Zero.reverseBits();
1535     break;
1536   }
1537   case ISD::SIGN_EXTEND_INREG: {
1538     SDValue Op0 = Op.getOperand(0);
1539     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1540     unsigned ExVTBits = ExVT.getScalarSizeInBits();
1541
1542     // If we only care about the highest bit, don't bother shifting right.
1543     if (DemandedBits.isSignMask()) {
1544       unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
1545       bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
1546       // However if the input is already sign extended we expect the sign
1547       // extension to be dropped altogether later and do not simplify.
1548       if (!AlreadySignExtended) {
1549         // Compute the correct shift amount type, which must be getShiftAmountTy
1550         // for scalar types after legalization.
1551         EVT ShiftAmtTy = VT;
1552         if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1553           ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1554
1555         SDValue ShiftAmt =
1556             TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy);
1557         return TLO.CombineTo(Op,
1558                              TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
1559       }
1560     }
1561
1562     // If none of the extended bits are demanded, eliminate the sextinreg.
1563     if (DemandedBits.getActiveBits() <= ExVTBits)
1564       return TLO.CombineTo(Op, Op0);
1565
1566     APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
1567
1568     // Since the sign extended bits are demanded, we know that the sign
1569     // bit is demanded.
1570     InputDemandedBits.setBit(ExVTBits - 1);
1571
1572     if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1))
1573       return true;
1574     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1575
1576     // If the sign bit of the input is known set or clear, then we know the
1577     // top bits of the result.
1578
1579     // If the input sign bit is known zero, convert this into a zero extension.
1580     if (Known.Zero[ExVTBits - 1])
1581       return TLO.CombineTo(
1582           Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType()));
1583
1584     APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1585     if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1586       Known.One.setBitsFrom(ExVTBits);
1587       Known.Zero &= Mask;
1588     } else { // Input sign bit unknown
1589       Known.Zero &= Mask;
1590       Known.One &= Mask;
1591     }
1592     break;
1593   }
1594   case ISD::BUILD_PAIR: {
1595     EVT HalfVT = Op.getOperand(0).getValueType();
1596     unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1597
1598     APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1599     APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1600
1601     KnownBits KnownLo, KnownHi;
1602
1603     if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1604       return true;
1605
1606     if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1607       return true;
1608
1609     Known.Zero = KnownLo.Zero.zext(BitWidth) |
1610                  KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1611
1612     Known.One = KnownLo.One.zext(BitWidth) |
1613                 KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1614     break;
1615   }
1616   case ISD::ZERO_EXTEND:
1617   case ISD::ZERO_EXTEND_VECTOR_INREG: {
1618     SDValue Src = Op.getOperand(0);
1619     EVT SrcVT = Src.getValueType();
1620     unsigned InBits = SrcVT.getScalarSizeInBits();
1621     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1622     bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
1623
1624     // If none of the top bits are demanded, convert this into an any_extend.
1625     if (DemandedBits.getActiveBits() <= InBits) {
1626       // If we only need the non-extended bits of the bottom element
1627       // then we can just bitcast to the result.
1628       if (IsVecInReg && DemandedElts == 1 &&
1629           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1630           TLO.DAG.getDataLayout().isLittleEndian())
1631         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1632
1633       unsigned Opc =
1634           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1635       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1636         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1637     }
1638
1639     APInt InDemandedBits = DemandedBits.trunc(InBits);
1640     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1641     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1642                              Depth + 1))
1643       return true;
1644     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1645     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1646     Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
1647     break;
1648   }
1649   case ISD::SIGN_EXTEND:
1650   case ISD::SIGN_EXTEND_VECTOR_INREG: {
1651     SDValue Src = Op.getOperand(0);
1652     EVT SrcVT = Src.getValueType();
1653     unsigned InBits = SrcVT.getScalarSizeInBits();
1654     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1655     bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
1656
1657     // If none of the top bits are demanded, convert this into an any_extend.
1658     if (DemandedBits.getActiveBits() <= InBits) {
1659       // If we only need the non-extended bits of the bottom element
1660       // then we can just bitcast to the result.
1661       if (IsVecInReg && DemandedElts == 1 &&
1662           VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1663           TLO.DAG.getDataLayout().isLittleEndian())
1664         return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1665
1666       unsigned Opc =
1667           IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
1668       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1669         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1670     }
1671
1672     APInt InDemandedBits = DemandedBits.trunc(InBits);
1673     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1674
1675     // Since some of the sign extended bits are demanded, we know that the sign
1676     // bit is demanded.
1677     InDemandedBits.setBit(InBits - 1);
1678
1679     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1680                              Depth + 1))
1681       return true;
1682     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1683     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1684
1685     // If the sign bit is known one, the top bits match.
1686     Known = Known.sext(BitWidth);
1687
1688     // If the sign bit is known zero, convert this to a zero extend.
1689     if (Known.isNonNegative()) {
1690       unsigned Opc =
1691           IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
1692       if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
1693         return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
1694     }
1695     break;
1696   }
1697   case ISD::ANY_EXTEND:
1698   case ISD::ANY_EXTEND_VECTOR_INREG: {
1699     SDValue Src = Op.getOperand(0);
1700     EVT SrcVT = Src.getValueType();
1701     unsigned InBits = SrcVT.getScalarSizeInBits();
1702     unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1703     bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
1704
1705     // If we only need the bottom element then we can just bitcast.
1706     // TODO: Handle ANY_EXTEND?
1707     if (IsVecInReg && DemandedElts == 1 &&
1708         VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1709         TLO.DAG.getDataLayout().isLittleEndian())
1710       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
1711
1712     APInt InDemandedBits = DemandedBits.trunc(InBits);
1713     APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
1714     if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
1715                              Depth + 1))
1716       return true;
1717     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1718     assert(Known.getBitWidth() == InBits && "Src width has changed?");
1719     Known = Known.zext(BitWidth, false /* => any extend */);
1720     break;
1721   }
1722   case ISD::TRUNCATE: {
1723     SDValue Src = Op.getOperand(0);
1724
1725     // Simplify the input, using demanded bit information, and compute the known
1726     // zero/one bits live out.
1727     unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
1728     APInt TruncMask = DemandedBits.zext(OperandBitWidth);
1729     if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1))
1730       return true;
1731     Known = Known.trunc(BitWidth);
1732
1733     // Attempt to avoid multi-use ops if we don't need anything from them.
1734     if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1735             Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
1736       return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
1737
1738     // If the input is only used by this truncate, see if we can shrink it based
1739     // on the known demanded bits.
1740     if (Src.getNode()->hasOneUse()) {
1741       switch (Src.getOpcode()) {
1742       default:
1743         break;
1744       case ISD::SRL:
1745         // Shrink SRL by a constant if none of the high bits shifted in are
1746         // demanded.
1747         if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
1748           // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1749           // undesirable.
1750           break;
1751
1752         auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
1753         if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
1754           break;
1755
1756         SDValue Shift = Src.getOperand(1);
1757         uint64_t ShVal = ShAmt->getZExtValue();
1758
1759         if (TLO.LegalTypes())
1760           Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
1761
1762         APInt HighBits =
1763             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
1764         HighBits.lshrInPlace(ShVal);
1765         HighBits = HighBits.trunc(BitWidth);
1766
1767         if (!(HighBits & DemandedBits)) {
1768           // None of the shifted in bits are needed.  Add a truncate of the
1769           // shift input, then shift it.
1770           SDValue NewTrunc =
1771               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
1772           return TLO.CombineTo(
1773               Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
1774         }
1775         break;
1776       }
1777     }
1778
1779     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1780     break;
1781   }
1782   case ISD::AssertZext: {
1783     // AssertZext demands all of the high bits, plus any of the low bits
1784     // demanded by its users.
1785     EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1786     APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
1787     if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
1788                              TLO, Depth + 1))
1789       return true;
1790     assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1791
1792     Known.Zero |= ~InMask;
1793     break;
1794   }
1795   case ISD::EXTRACT_VECTOR_ELT: {
1796     SDValue Src = Op.getOperand(0);
1797     SDValue Idx = Op.getOperand(1);
1798     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1799     unsigned EltBitWidth = Src.getScalarValueSizeInBits();
1800
1801     // Demand the bits from every vector element without a constant index.
1802     APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
1803     if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
1804       if (CIdx->getAPIntValue().ult(NumSrcElts))
1805         DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
1806
1807     // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
1808     // anything about the extended bits.
1809     APInt DemandedSrcBits = DemandedBits;
1810     if (BitWidth > EltBitWidth)
1811       DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
1812
1813     if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
1814                              Depth + 1))
1815       return true;
1816
1817     Known = Known2;
1818     if (BitWidth > EltBitWidth)
1819       Known = Known.zext(BitWidth, false /* => any extend */);
1820     break;
1821   }
1822   case ISD::BITCAST: {
1823     SDValue Src = Op.getOperand(0);
1824     EVT SrcVT = Src.getValueType();
1825     unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
1826
1827     // If this is an FP->Int bitcast and if the sign bit is the only
1828     // thing demanded, turn this into a FGETSIGN.
1829     if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
1830         DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
1831         SrcVT.isFloatingPoint()) {
1832       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
1833       bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
1834       if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
1835           SrcVT != MVT::f128) {
1836         // Cannot eliminate/lower SHL for f128 yet.
1837         EVT Ty = OpVTLegal ? VT : MVT::i32;
1838         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
1839         // place.  We expect the SHL to be eliminated by other optimizations.
1840         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
1841         unsigned OpVTSizeInBits = Op.getValueSizeInBits();
1842         if (!OpVTLegal && OpVTSizeInBits > 32)
1843           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
1844         unsigned ShVal = Op.getValueSizeInBits() - 1;
1845         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
1846         return TLO.CombineTo(Op,
1847                              TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
1848       }
1849     }
1850
1851     // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
1852     // Demand the elt/bit if any of the original elts/bits are demanded.
1853     // TODO - bigendian once we have test coverage.
1854     if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
1855         TLO.DAG.getDataLayout().isLittleEndian()) {
1856       unsigned Scale = BitWidth / NumSrcEltBits;
1857       unsigned NumSrcElts = SrcVT.getVectorNumElements();
1858       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1859       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1860       for (unsigned i = 0; i != Scale; ++i) {
1861         unsigned Offset = i * NumSrcEltBits;
1862         APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
1863         if (!Sub.isNullValue()) {
1864           DemandedSrcBits |= Sub;
1865           for (unsigned j = 0; j != NumElts; ++j)
1866             if (DemandedElts[j])
1867               DemandedSrcElts.setBit((j * Scale) + i);
1868         }
1869       }
1870
1871       APInt KnownSrcUndef, KnownSrcZero;
1872       if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1873                                      KnownSrcZero, TLO, Depth + 1))
1874         return true;
1875
1876       KnownBits KnownSrcBits;
1877       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1878                                KnownSrcBits, TLO, Depth + 1))
1879         return true;
1880     } else if ((NumSrcEltBits % BitWidth) == 0 &&
1881                TLO.DAG.getDataLayout().isLittleEndian()) {
1882       unsigned Scale = NumSrcEltBits / BitWidth;
1883       unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
1884       APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
1885       APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
1886       for (unsigned i = 0; i != NumElts; ++i)
1887         if (DemandedElts[i]) {
1888           unsigned Offset = (i % Scale) * BitWidth;
1889           DemandedSrcBits.insertBits(DemandedBits, Offset);
1890           DemandedSrcElts.setBit(i / Scale);
1891         }
1892
1893       if (SrcVT.isVector()) {
1894         APInt KnownSrcUndef, KnownSrcZero;
1895         if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
1896                                        KnownSrcZero, TLO, Depth + 1))
1897           return true;
1898       }
1899
1900       KnownBits KnownSrcBits;
1901       if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
1902                                KnownSrcBits, TLO, Depth + 1))
1903         return true;
1904     }
1905
1906     // If this is a bitcast, let computeKnownBits handle it.  Only do this on a
1907     // recursive call where Known may be useful to the caller.
1908     if (Depth > 0) {
1909       Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1910       return false;
1911     }
1912     break;
1913   }
1914   case ISD::ADD:
1915   case ISD::MUL:
1916   case ISD::SUB: {
1917     // Add, Sub, and Mul don't demand any bits in positions beyond that
1918     // of the highest bit demanded of them.
1919     SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
1920     SDNodeFlags Flags = Op.getNode()->getFlags();
1921     unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
1922     APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
1923     if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
1924                              Depth + 1) ||
1925         SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
1926                              Depth + 1) ||
1927         // See if the operation should be performed at a smaller bit width.
1928         ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
1929       if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1930         // Disable the nsw and nuw flags. We can no longer guarantee that we
1931         // won't wrap after simplification.
1932         Flags.setNoSignedWrap(false);
1933         Flags.setNoUnsignedWrap(false);
1934         SDValue NewOp =
1935             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
1936         return TLO.CombineTo(Op, NewOp);
1937       }
1938       return true;
1939     }
1940
1941     // Attempt to avoid multi-use ops if we don't need anything from them.
1942     if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1943       SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1944           Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
1945       SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1946           Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
1947       if (DemandedOp0 || DemandedOp1) {
1948         Flags.setNoSignedWrap(false);
1949         Flags.setNoUnsignedWrap(false);
1950         Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1951         Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1952         SDValue NewOp =
1953             TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
1954         return TLO.CombineTo(Op, NewOp);
1955       }
1956     }
1957
1958     // If we have a constant operand, we may be able to turn it into -1 if we
1959     // do not demand the high bits. This can make the constant smaller to
1960     // encode, allow more general folding, or match specialized instruction
1961     // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
1962     // is probably not useful (and could be detrimental).
1963     ConstantSDNode *C = isConstOrConstSplat(Op1);
1964     APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
1965     if (C && !C->isAllOnesValue() && !C->isOne() &&
1966         (C->getAPIntValue() | HighMask).isAllOnesValue()) {
1967       SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
1968       // Disable the nsw and nuw flags. We can no longer guarantee that we
1969       // won't wrap after simplification.
1970       Flags.setNoSignedWrap(false);
1971       Flags.setNoUnsignedWrap(false);
1972       SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
1973       return TLO.CombineTo(Op, NewOp);
1974     }
1975
1976     LLVM_FALLTHROUGH;
1977   }
1978   default:
1979     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
1980       if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
1981                                             Known, TLO, Depth))
1982         return true;
1983       break;
1984     }
1985
1986     // Just use computeKnownBits to compute output bits.
1987     Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1988     break;
1989   }
1990
1991   // If we know the value of all of the demanded bits, return this as a
1992   // constant.
1993   if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
1994     // Avoid folding to a constant if any OpaqueConstant is involved.
1995     const SDNode *N = Op.getNode();
1996     for (SDNodeIterator I = SDNodeIterator::begin(N),
1997                         E = SDNodeIterator::end(N);
1998          I != E; ++I) {
1999       SDNode *Op = *I;
2000       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2001         if (C->isOpaque())
2002           return false;
2003     }
2004     // TODO: Handle float bits as well.
2005     if (VT.isInteger())
2006       return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2007   }
2008
2009   return false;
2010 }
2011
2012 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2013                                                 const APInt &DemandedElts,
2014                                                 APInt &KnownUndef,
2015                                                 APInt &KnownZero,
2016                                                 DAGCombinerInfo &DCI) const {
2017   SelectionDAG &DAG = DCI.DAG;
2018   TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2019                         !DCI.isBeforeLegalizeOps());
2020
2021   bool Simplified =
2022       SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2023   if (Simplified) {
2024     DCI.AddToWorklist(Op.getNode());
2025     DCI.CommitTargetLoweringOpt(TLO);
2026   }
2027
2028   return Simplified;
2029 }
2030
2031 /// Given a vector binary operation and known undefined elements for each input
2032 /// operand, compute whether each element of the output is undefined.
2033 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2034                                          const APInt &UndefOp0,
2035                                          const APInt &UndefOp1) {
2036   EVT VT = BO.getValueType();
2037   assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2038          "Vector binop only");
2039
2040   EVT EltVT = VT.getVectorElementType();
2041   unsigned NumElts = VT.getVectorNumElements();
2042   assert(UndefOp0.getBitWidth() == NumElts &&
2043          UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2044
2045   auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2046                                    const APInt &UndefVals) {
2047     if (UndefVals[Index])
2048       return DAG.getUNDEF(EltVT);
2049
2050     if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2051       // Try hard to make sure that the getNode() call is not creating temporary
2052       // nodes. Ignore opaque integers because they do not constant fold.
2053       SDValue Elt = BV->getOperand(Index);
2054       auto *C = dyn_cast<ConstantSDNode>(Elt);
2055       if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2056         return Elt;
2057     }
2058
2059     return SDValue();
2060   };
2061
2062   APInt KnownUndef = APInt::getNullValue(NumElts);
2063   for (unsigned i = 0; i != NumElts; ++i) {
2064     // If both inputs for this element are either constant or undef and match
2065     // the element type, compute the constant/undef result for this element of
2066     // the vector.
2067     // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2068     // not handle FP constants. The code within getNode() should be refactored
2069     // to avoid the danger of creating a bogus temporary node here.
2070     SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2071     SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2072     if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2073       if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2074         KnownUndef.setBit(i);
2075   }
2076   return KnownUndef;
2077 }
2078
2079 bool TargetLowering::SimplifyDemandedVectorElts(
2080     SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2081     APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2082     bool AssumeSingleUse) const {
2083   EVT VT = Op.getValueType();
2084   APInt DemandedElts = OriginalDemandedElts;
2085   unsigned NumElts = DemandedElts.getBitWidth();
2086   assert(VT.isVector() && "Expected vector op");
2087   assert(VT.getVectorNumElements() == NumElts &&
2088          "Mask size mismatches value type element count!");
2089
2090   KnownUndef = KnownZero = APInt::getNullValue(NumElts);
2091
2092   // Undef operand.
2093   if (Op.isUndef()) {
2094     KnownUndef.setAllBits();
2095     return false;
2096   }
2097
2098   // If Op has other users, assume that all elements are needed.
2099   if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
2100     DemandedElts.setAllBits();
2101
2102   // Not demanding any elements from Op.
2103   if (DemandedElts == 0) {
2104     KnownUndef.setAllBits();
2105     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2106   }
2107
2108   // Limit search depth.
2109   if (Depth >= 6)
2110     return false;
2111
2112   SDLoc DL(Op);
2113   unsigned EltSizeInBits = VT.getScalarSizeInBits();
2114
2115   switch (Op.getOpcode()) {
2116   case ISD::SCALAR_TO_VECTOR: {
2117     if (!DemandedElts[0]) {
2118       KnownUndef.setAllBits();
2119       return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2120     }
2121     KnownUndef.setHighBits(NumElts - 1);
2122     break;
2123   }
2124   case ISD::BITCAST: {
2125     SDValue Src = Op.getOperand(0);
2126     EVT SrcVT = Src.getValueType();
2127
2128     // We only handle vectors here.
2129     // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2130     if (!SrcVT.isVector())
2131       break;
2132
2133     // Fast handling of 'identity' bitcasts.
2134     unsigned NumSrcElts = SrcVT.getVectorNumElements();
2135     if (NumSrcElts == NumElts)
2136       return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2137                                         KnownZero, TLO, Depth + 1);
2138
2139     APInt SrcZero, SrcUndef;
2140     APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
2141
2142     // Bitcast from 'large element' src vector to 'small element' vector, we
2143     // must demand a source element if any DemandedElt maps to it.
2144     if ((NumElts % NumSrcElts) == 0) {
2145       unsigned Scale = NumElts / NumSrcElts;
2146       for (unsigned i = 0; i != NumElts; ++i)
2147         if (DemandedElts[i])
2148           SrcDemandedElts.setBit(i / Scale);
2149
2150       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2151                                      TLO, Depth + 1))
2152         return true;
2153
2154       // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2155       // of the large element.
2156       // TODO - bigendian once we have test coverage.
2157       if (TLO.DAG.getDataLayout().isLittleEndian()) {
2158         unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2159         APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
2160         for (unsigned i = 0; i != NumElts; ++i)
2161           if (DemandedElts[i]) {
2162             unsigned Ofs = (i % Scale) * EltSizeInBits;
2163             SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2164           }
2165
2166         KnownBits Known;
2167         if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1))
2168           return true;
2169       }
2170
2171       // If the src element is zero/undef then all the output elements will be -
2172       // only demanded elements are guaranteed to be correct.
2173       for (unsigned i = 0; i != NumSrcElts; ++i) {
2174         if (SrcDemandedElts[i]) {
2175           if (SrcZero[i])
2176             KnownZero.setBits(i * Scale, (i + 1) * Scale);
2177           if (SrcUndef[i])
2178             KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2179         }
2180       }
2181     }
2182
2183     // Bitcast from 'small element' src vector to 'large element' vector, we
2184     // demand all smaller source elements covered by the larger demanded element
2185     // of this vector.
2186     if ((NumSrcElts % NumElts) == 0) {
2187       unsigned Scale = NumSrcElts / NumElts;
2188       for (unsigned i = 0; i != NumElts; ++i)
2189         if (DemandedElts[i])
2190           SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
2191
2192       if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2193                                      TLO, Depth + 1))
2194         return true;
2195
2196       // If all the src elements covering an output element are zero/undef, then
2197       // the output element will be as well, assuming it was demanded.
2198       for (unsigned i = 0; i != NumElts; ++i) {
2199         if (DemandedElts[i]) {
2200           if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
2201             KnownZero.setBit(i);
2202           if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
2203             KnownUndef.setBit(i);
2204         }
2205       }
2206     }
2207     break;
2208   }
2209   case ISD::BUILD_VECTOR: {
2210     // Check all elements and simplify any unused elements with UNDEF.
2211     if (!DemandedElts.isAllOnesValue()) {
2212       // Don't simplify BROADCASTS.
2213       if (llvm::any_of(Op->op_values(),
2214                        [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
2215         SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
2216         bool Updated = false;
2217         for (unsigned i = 0; i != NumElts; ++i) {
2218           if (!DemandedElts[i] && !Ops[i].isUndef()) {
2219             Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
2220             KnownUndef.setBit(i);
2221             Updated = true;
2222           }
2223         }
2224         if (Updated)
2225           return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
2226       }
2227     }
2228     for (unsigned i = 0; i != NumElts; ++i) {
2229       SDValue SrcOp = Op.getOperand(i);
2230       if (SrcOp.isUndef()) {
2231         KnownUndef.setBit(i);
2232       } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
2233                  (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
2234         KnownZero.setBit(i);
2235       }
2236     }
2237     break;
2238   }
2239   case ISD::CONCAT_VECTORS: {
2240     EVT SubVT = Op.getOperand(0).getValueType();
2241     unsigned NumSubVecs = Op.getNumOperands();
2242     unsigned NumSubElts = SubVT.getVectorNumElements();
2243     for (unsigned i = 0; i != NumSubVecs; ++i) {
2244       SDValue SubOp = Op.getOperand(i);
2245       APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
2246       APInt SubUndef, SubZero;
2247       if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
2248                                      Depth + 1))
2249         return true;
2250       KnownUndef.insertBits(SubUndef, i * NumSubElts);
2251       KnownZero.insertBits(SubZero, i * NumSubElts);
2252     }
2253     break;
2254   }
2255   case ISD::INSERT_SUBVECTOR: {
2256     if (!isa<ConstantSDNode>(Op.getOperand(2)))
2257       break;
2258     SDValue Base = Op.getOperand(0);
2259     SDValue Sub = Op.getOperand(1);
2260     EVT SubVT = Sub.getValueType();
2261     unsigned NumSubElts = SubVT.getVectorNumElements();
2262     const APInt &Idx = Op.getConstantOperandAPInt(2);
2263     if (Idx.ugt(NumElts - NumSubElts))
2264       break;
2265     unsigned SubIdx = Idx.getZExtValue();
2266     APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
2267     APInt SubUndef, SubZero;
2268     if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
2269                                    Depth + 1))
2270       return true;
2271     APInt BaseElts = DemandedElts;
2272     BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
2273
2274     // If none of the base operand elements are demanded, replace it with undef.
2275     if (!BaseElts && !Base.isUndef())
2276       return TLO.CombineTo(Op,
2277                            TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
2278                                            TLO.DAG.getUNDEF(VT),
2279                                            Op.getOperand(1),
2280                                            Op.getOperand(2)));
2281
2282     if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
2283                                    Depth + 1))
2284       return true;
2285     KnownUndef.insertBits(SubUndef, SubIdx);
2286     KnownZero.insertBits(SubZero, SubIdx);
2287     break;
2288   }
2289   case ISD::EXTRACT_SUBVECTOR: {
2290     SDValue Src = Op.getOperand(0);
2291     ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2292     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2293     if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
2294       // Offset the demanded elts by the subvector index.
2295       uint64_t Idx = SubIdx->getZExtValue();
2296       APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
2297       APInt SrcUndef, SrcZero;
2298       if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
2299                                      Depth + 1))
2300         return true;
2301       KnownUndef = SrcUndef.extractBits(NumElts, Idx);
2302       KnownZero = SrcZero.extractBits(NumElts, Idx);
2303     }
2304     break;
2305   }
2306   case ISD::INSERT_VECTOR_ELT: {
2307     SDValue Vec = Op.getOperand(0);
2308     SDValue Scl = Op.getOperand(1);
2309     auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2310
2311     // For a legal, constant insertion index, if we don't need this insertion
2312     // then strip it, else remove it from the demanded elts.
2313     if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
2314       unsigned Idx = CIdx->getZExtValue();
2315       if (!DemandedElts[Idx])
2316         return TLO.CombineTo(Op, Vec);
2317
2318       APInt DemandedVecElts(DemandedElts);
2319       DemandedVecElts.clearBit(Idx);
2320       if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
2321                                      KnownZero, TLO, Depth + 1))
2322         return true;
2323
2324       KnownUndef.clearBit(Idx);
2325       if (Scl.isUndef())
2326         KnownUndef.setBit(Idx);
2327
2328       KnownZero.clearBit(Idx);
2329       if (isNullConstant(Scl) || isNullFPConstant(Scl))
2330         KnownZero.setBit(Idx);
2331       break;
2332     }
2333
2334     APInt VecUndef, VecZero;
2335     if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
2336                                    Depth + 1))
2337       return true;
2338     // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2339     break;
2340   }
2341   case ISD::VSELECT: {
2342     // Try to transform the select condition based on the current demanded
2343     // elements.
2344     // TODO: If a condition element is undef, we can choose from one arm of the
2345     //       select (and if one arm is undef, then we can propagate that to the
2346     //       result).
2347     // TODO - add support for constant vselect masks (see IR version of this).
2348     APInt UnusedUndef, UnusedZero;
2349     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
2350                                    UnusedZero, TLO, Depth + 1))
2351       return true;
2352
2353     // See if we can simplify either vselect operand.
2354     APInt DemandedLHS(DemandedElts);
2355     APInt DemandedRHS(DemandedElts);
2356     APInt UndefLHS, ZeroLHS;
2357     APInt UndefRHS, ZeroRHS;
2358     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
2359                                    ZeroLHS, TLO, Depth + 1))
2360       return true;
2361     if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
2362                                    ZeroRHS, TLO, Depth + 1))
2363       return true;
2364
2365     KnownUndef = UndefLHS & UndefRHS;
2366     KnownZero = ZeroLHS & ZeroRHS;
2367     break;
2368   }
2369   case ISD::VECTOR_SHUFFLE: {
2370     ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
2371
2372     // Collect demanded elements from shuffle operands..
2373     APInt DemandedLHS(NumElts, 0);
2374     APInt DemandedRHS(NumElts, 0);
2375     for (unsigned i = 0; i != NumElts; ++i) {
2376       int M = ShuffleMask[i];
2377       if (M < 0 || !DemandedElts[i])
2378         continue;
2379       assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
2380       if (M < (int)NumElts)
2381         DemandedLHS.setBit(M);
2382       else
2383         DemandedRHS.setBit(M - NumElts);
2384     }
2385
2386     // See if we can simplify either shuffle operand.
2387     APInt UndefLHS, ZeroLHS;
2388     APInt UndefRHS, ZeroRHS;
2389     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
2390                                    ZeroLHS, TLO, Depth + 1))
2391       return true;
2392     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
2393                                    ZeroRHS, TLO, Depth + 1))
2394       return true;
2395
2396     // Simplify mask using undef elements from LHS/RHS.
2397     bool Updated = false;
2398     bool IdentityLHS = true, IdentityRHS = true;
2399     SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
2400     for (unsigned i = 0; i != NumElts; ++i) {
2401       int &M = NewMask[i];
2402       if (M < 0)
2403         continue;
2404       if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
2405           (M >= (int)NumElts && UndefRHS[M - NumElts])) {
2406         Updated = true;
2407         M = -1;
2408       }
2409       IdentityLHS &= (M < 0) || (M == (int)i);
2410       IdentityRHS &= (M < 0) || ((M - NumElts) == i);
2411     }
2412
2413     // Update legal shuffle masks based on demanded elements if it won't reduce
2414     // to Identity which can cause premature removal of the shuffle mask.
2415     if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
2416       SDValue LegalShuffle =
2417           buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
2418                                   NewMask, TLO.DAG);
2419       if (LegalShuffle)
2420         return TLO.CombineTo(Op, LegalShuffle);
2421     }
2422
2423     // Propagate undef/zero elements from LHS/RHS.
2424     for (unsigned i = 0; i != NumElts; ++i) {
2425       int M = ShuffleMask[i];
2426       if (M < 0) {
2427         KnownUndef.setBit(i);
2428       } else if (M < (int)NumElts) {
2429         if (UndefLHS[M])
2430           KnownUndef.setBit(i);
2431         if (ZeroLHS[M])
2432           KnownZero.setBit(i);
2433       } else {
2434         if (UndefRHS[M - NumElts])
2435           KnownUndef.setBit(i);
2436         if (ZeroRHS[M - NumElts])
2437           KnownZero.setBit(i);
2438       }
2439     }
2440     break;
2441   }
2442   case ISD::ANY_EXTEND_VECTOR_INREG:
2443   case ISD::SIGN_EXTEND_VECTOR_INREG:
2444   case ISD::ZERO_EXTEND_VECTOR_INREG: {
2445     APInt SrcUndef, SrcZero;
2446     SDValue Src = Op.getOperand(0);
2447     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
2448     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
2449     if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
2450                                    Depth + 1))
2451       return true;
2452     KnownZero = SrcZero.zextOrTrunc(NumElts);
2453     KnownUndef = SrcUndef.zextOrTrunc(NumElts);
2454
2455     if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
2456         Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
2457         DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
2458       // aext - if we just need the bottom element then we can bitcast.
2459       return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2460     }
2461
2462     if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
2463       // zext(undef) upper bits are guaranteed to be zero.
2464       if (DemandedElts.isSubsetOf(KnownUndef))
2465         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2466       KnownUndef.clearAllBits();
2467     }
2468     break;
2469   }
2470
2471   // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
2472   // MAX, saturated math, etc.
2473   case ISD::OR:
2474   case ISD::XOR:
2475   case ISD::ADD:
2476   case ISD::SUB:
2477   case ISD::FADD:
2478   case ISD::FSUB:
2479   case ISD::FMUL:
2480   case ISD::FDIV:
2481   case ISD::FREM: {
2482     APInt UndefRHS, ZeroRHS;
2483     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
2484                                    ZeroRHS, TLO, Depth + 1))
2485       return true;
2486     APInt UndefLHS, ZeroLHS;
2487     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
2488                                    ZeroLHS, TLO, Depth + 1))
2489       return true;
2490
2491     KnownZero = ZeroLHS & ZeroRHS;
2492     KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
2493     break;
2494   }
2495   case ISD::SHL:
2496   case ISD::SRL:
2497   case ISD::SRA:
2498   case ISD::ROTL:
2499   case ISD::ROTR: {
2500     APInt UndefRHS, ZeroRHS;
2501     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
2502                                    ZeroRHS, TLO, Depth + 1))
2503       return true;
2504     APInt UndefLHS, ZeroLHS;
2505     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
2506                                    ZeroLHS, TLO, Depth + 1))
2507       return true;
2508
2509     KnownZero = ZeroLHS;
2510     KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
2511     break;
2512   }
2513   case ISD::MUL:
2514   case ISD::AND: {
2515     APInt SrcUndef, SrcZero;
2516     if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
2517                                    SrcZero, TLO, Depth + 1))
2518       return true;
2519     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2520                                    KnownZero, TLO, Depth + 1))
2521       return true;
2522
2523     // If either side has a zero element, then the result element is zero, even
2524     // if the other is an UNDEF.
2525     // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
2526     // and then handle 'and' nodes with the rest of the binop opcodes.
2527     KnownZero |= SrcZero;
2528     KnownUndef &= SrcUndef;
2529     KnownUndef &= ~KnownZero;
2530     break;
2531   }
2532   case ISD::TRUNCATE:
2533   case ISD::SIGN_EXTEND:
2534   case ISD::ZERO_EXTEND:
2535     if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
2536                                    KnownZero, TLO, Depth + 1))
2537       return true;
2538
2539     if (Op.getOpcode() == ISD::ZERO_EXTEND) {
2540       // zext(undef) upper bits are guaranteed to be zero.
2541       if (DemandedElts.isSubsetOf(KnownUndef))
2542         return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
2543       KnownUndef.clearAllBits();
2544     }
2545     break;
2546   default: {
2547     if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
2548       if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
2549                                                   KnownZero, TLO, Depth))
2550         return true;
2551     } else {
2552       KnownBits Known;
2553       APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
2554       if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
2555                                TLO, Depth, AssumeSingleUse))
2556         return true;
2557     }
2558     break;
2559   }
2560   }
2561   assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
2562
2563   // Constant fold all undef cases.
2564   // TODO: Handle zero cases as well.
2565   if (DemandedElts.isSubsetOf(KnownUndef))
2566     return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2567
2568   return false;
2569 }
2570
2571 /// Determine which of the bits specified in Mask are known to be either zero or
2572 /// one and return them in the Known.
2573 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2574                                                    KnownBits &Known,
2575                                                    const APInt &DemandedElts,
2576                                                    const SelectionDAG &DAG,
2577                                                    unsigned Depth) const {
2578   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2579           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2580           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2581           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2582          "Should use MaskedValueIsZero if you don't know whether Op"
2583          " is a target node!");
2584   Known.resetAll();
2585 }
2586
2587 void TargetLowering::computeKnownBitsForTargetInstr(
2588     Register R, KnownBits &Known, const APInt &DemandedElts,
2589     const MachineRegisterInfo &MRI, unsigned Depth) const {
2590   Known.resetAll();
2591 }
2592
2593 void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
2594                                                    KnownBits &Known,
2595                                                    const APInt &DemandedElts,
2596                                                    const SelectionDAG &DAG,
2597                                                    unsigned Depth) const {
2598   assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
2599
2600   if (unsigned Align = DAG.InferPtrAlignment(Op)) {
2601     // The low bits are known zero if the pointer is aligned.
2602     Known.Zero.setLowBits(Log2_32(Align));
2603   }
2604 }
2605
2606 /// This method can be implemented by targets that want to expose additional
2607 /// information about sign bits to the DAG Combiner.
2608 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2609                                                          const APInt &,
2610                                                          const SelectionDAG &,
2611                                                          unsigned Depth) const {
2612   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2613           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2614           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2615           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2616          "Should use ComputeNumSignBits if you don't know whether Op"
2617          " is a target node!");
2618   return 1;
2619 }
2620
2621 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
2622     SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
2623     TargetLoweringOpt &TLO, unsigned Depth) const {
2624   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2625           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2626           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2627           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2628          "Should use SimplifyDemandedVectorElts if you don't know whether Op"
2629          " is a target node!");
2630   return false;
2631 }
2632
2633 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
2634     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2635     KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
2636   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2637           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2638           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2639           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2640          "Should use SimplifyDemandedBits if you don't know whether Op"
2641          " is a target node!");
2642   computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
2643   return false;
2644 }
2645
2646 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
2647     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2648     SelectionDAG &DAG, unsigned Depth) const {
2649   assert(
2650       (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2651        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2652        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2653        Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2654       "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
2655       " is a target node!");
2656   return SDValue();
2657 }
2658
2659 SDValue
2660 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
2661                                         SDValue N1, MutableArrayRef<int> Mask,
2662                                         SelectionDAG &DAG) const {
2663   bool LegalMask = isShuffleMaskLegal(Mask, VT);
2664   if (!LegalMask) {
2665     std::swap(N0, N1);
2666     ShuffleVectorSDNode::commuteMask(Mask);
2667     LegalMask = isShuffleMaskLegal(Mask, VT);
2668   }
2669
2670   if (!LegalMask)
2671     return SDValue();
2672
2673   return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
2674 }
2675
2676 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
2677   return nullptr;
2678 }
2679
2680 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
2681                                                   const SelectionDAG &DAG,
2682                                                   bool SNaN,
2683                                                   unsigned Depth) const {
2684   assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2685           Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2686           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
2687           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
2688          "Should use isKnownNeverNaN if you don't know whether Op"
2689          " is a target node!");
2690   return false;
2691 }
2692
2693 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
2694 // work with truncating build vectors and vectors with elements of less than
2695 // 8 bits.
2696 bool TargetLowering::isConstTrueVal(const SDNode *N) const {
2697   if (!N)
2698     return false;
2699
2700   APInt CVal;
2701   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
2702     CVal = CN->getAPIntValue();
2703   } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
2704     auto *CN = BV->getConstantSplatNode();
2705     if (!CN)
2706       return false;
2707
2708     // If this is a truncating build vector, truncate the splat value.
2709     // Otherwise, we may fail to match the expected values below.
2710     unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
2711     CVal = CN->getAPIntValue();
2712     if (BVEltWidth < CVal.getBitWidth())
2713       CVal = CVal.trunc(BVEltWidth);
2714   } else {
2715     return false;
2716   }
2717
2718   switch (getBooleanContents(N->getValueType(0))) {
2719   case UndefinedBooleanContent:
2720     return CVal[0];
2721   case ZeroOrOneBooleanContent:
2722     return CVal.isOneValue();
2723   case ZeroOrNegativeOneBooleanContent:
2724     return CVal.isAllOnesValue();
2725   }
2726
2727   llvm_unreachable("Invalid boolean contents");
2728 }
2729
2730 bool TargetLowering::isConstFalseVal(const SDNode *N) const {
2731   if (!N)
2732     return false;
2733
2734   const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
2735   if (!CN) {
2736     const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
2737     if (!BV)
2738       return false;
2739
2740     // Only interested in constant splats, we don't care about undef
2741     // elements in identifying boolean constants and getConstantSplatNode
2742     // returns NULL if all ops are undef;
2743     CN = BV->getConstantSplatNode();
2744     if (!CN)
2745       return false;
2746   }
2747
2748   if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
2749     return !CN->getAPIntValue()[0];
2750
2751   return CN->isNullValue();
2752 }
2753
2754 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
2755                                        bool SExt) const {
2756   if (VT == MVT::i1)
2757     return N->isOne();
2758
2759   TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
2760   switch (Cnt) {
2761   case TargetLowering::ZeroOrOneBooleanContent:
2762     // An extended value of 1 is always true, unless its original type is i1,
2763     // in which case it will be sign extended to -1.
2764     return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
2765   case TargetLowering::UndefinedBooleanContent:
2766   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2767     return N->isAllOnesValue() && SExt;
2768   }
2769   llvm_unreachable("Unexpected enumeration.");
2770 }
2771
2772 /// This helper function of SimplifySetCC tries to optimize the comparison when
2773 /// either operand of the SetCC node is a bitwise-and instruction.
2774 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
2775                                          ISD::CondCode Cond, const SDLoc &DL,
2776                                          DAGCombinerInfo &DCI) const {
2777   // Match these patterns in any of their permutations:
2778   // (X & Y) == Y
2779   // (X & Y) != Y
2780   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
2781     std::swap(N0, N1);
2782
2783   EVT OpVT = N0.getValueType();
2784   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
2785       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
2786     return SDValue();
2787
2788   SDValue X, Y;
2789   if (N0.getOperand(0) == N1) {
2790     X = N0.getOperand(1);
2791     Y = N0.getOperand(0);
2792   } else if (N0.getOperand(1) == N1) {
2793     X = N0.getOperand(0);
2794     Y = N0.getOperand(1);
2795   } else {
2796     return SDValue();
2797   }
2798
2799   SelectionDAG &DAG = DCI.DAG;
2800   SDValue Zero = DAG.getConstant(0, DL, OpVT);
2801   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
2802     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
2803     // Note that where Y is variable and is known to have at most one bit set
2804     // (for example, if it is Z & 1) we cannot do this; the expressions are not
2805     // equivalent when Y == 0.
2806     Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
2807     if (DCI.isBeforeLegalizeOps() ||
2808         isCondCodeLegal(Cond, N0.getSimpleValueType()))
2809       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
2810   } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
2811     // If the target supports an 'and-not' or 'and-complement' logic operation,
2812     // try to use that to make a comparison operation more efficient.
2813     // But don't do this transform if the mask is a single bit because there are
2814     // more efficient ways to deal with that case (for example, 'bt' on x86 or
2815     // 'rlwinm' on PPC).
2816
2817     // Bail out if the compare operand that we want to turn into a zero is
2818     // already a zero (otherwise, infinite loop).
2819     auto *YConst = dyn_cast<ConstantSDNode>(Y);
2820     if (YConst && YConst->isNullValue())
2821       return SDValue();
2822
2823     // Transform this into: ~X & Y == 0.
2824     SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
2825     SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
2826     return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
2827   }
2828
2829   return SDValue();
2830 }
2831
2832 /// There are multiple IR patterns that could be checking whether certain
2833 /// truncation of a signed number would be lossy or not. The pattern which is
2834 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
2835 /// We are looking for the following pattern: (KeptBits is a constant)
2836 ///   (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
2837 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
2838 /// KeptBits also can't be 1, that would have been folded to  %x dstcond 0
2839 /// We will unfold it into the natural trunc+sext pattern:
2840 ///   ((%x << C) a>> C) dstcond %x
2841 /// Where  C = bitwidth(x) - KeptBits  and  C u< bitwidth(x)
2842 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
2843     EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
2844     const SDLoc &DL) const {
2845   // We must be comparing with a constant.
2846   ConstantSDNode *C1;
2847   if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
2848     return SDValue();
2849
2850   // N0 should be:  add %x, (1 << (KeptBits-1))
2851   if (N0->getOpcode() != ISD::ADD)
2852     return SDValue();
2853
2854   // And we must be 'add'ing a constant.
2855   ConstantSDNode *C01;
2856   if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
2857     return SDValue();
2858
2859   SDValue X = N0->getOperand(0);
2860   EVT XVT = X.getValueType();
2861
2862   // Validate constants ...
2863
2864   APInt I1 = C1->getAPIntValue();
2865
2866   ISD::CondCode NewCond;
2867   if (Cond == ISD::CondCode::SETULT) {
2868     NewCond = ISD::CondCode::SETEQ;
2869   } else if (Cond == ISD::CondCode::SETULE) {
2870     NewCond = ISD::CondCode::SETEQ;
2871     // But need to 'canonicalize' the constant.
2872     I1 += 1;
2873   } else if (Cond == ISD::CondCode::SETUGT) {
2874     NewCond = ISD::CondCode::SETNE;
2875     // But need to 'canonicalize' the constant.
2876     I1 += 1;
2877   } else if (Cond == ISD::CondCode::SETUGE) {
2878     NewCond = ISD::CondCode::SETNE;
2879   } else
2880     return SDValue();
2881
2882   APInt I01 = C01->getAPIntValue();
2883
2884   auto checkConstants = [&I1, &I01]() -> bool {
2885     // Both of them must be power-of-two, and the constant from setcc is bigger.
2886     return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
2887   };
2888
2889   if (checkConstants()) {
2890     // Great, e.g. got  icmp ult i16 (add i16 %x, 128), 256
2891   } else {
2892     // What if we invert constants? (and the target predicate)
2893     I1.negate();
2894     I01.negate();
2895     NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
2896     if (!checkConstants())
2897       return SDValue();
2898     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
2899   }
2900
2901   // They are power-of-two, so which bit is set?
2902   const unsigned KeptBits = I1.logBase2();
2903   const unsigned KeptBitsMinusOne = I01.logBase2();
2904
2905   // Magic!
2906   if (KeptBits != (KeptBitsMinusOne + 1))
2907     return SDValue();
2908   assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
2909
2910   // We don't want to do this in every single case.
2911   SelectionDAG &DAG = DCI.DAG;
2912   if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
2913           XVT, KeptBits))
2914     return SDValue();
2915
2916   const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
2917   assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
2918
2919   // Unfold into:  ((%x << C) a>> C) cond %x
2920   // Where 'cond' will be either 'eq' or 'ne'.
2921   SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
2922   SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
2923   SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
2924   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
2925
2926   return T2;
2927 }
2928
2929 // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
2930 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
2931     EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
2932     DAGCombinerInfo &DCI, const SDLoc &DL) const {
2933   assert(isConstOrConstSplat(N1C) &&
2934          isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
2935          "Should be a comparison with 0.");
2936   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2937          "Valid only for [in]equality comparisons.");
2938
2939   unsigned NewShiftOpcode;
2940   SDValue X, C, Y;
2941
2942   SelectionDAG &DAG = DCI.DAG;
2943   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2944
2945   // Look for '(C l>>/<< Y)'.
2946   auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
2947     // The shift should be one-use.
2948     if (!V.hasOneUse())
2949       return false;
2950     unsigned OldShiftOpcode = V.getOpcode();
2951     switch (OldShiftOpcode) {
2952     case ISD::SHL:
2953       NewShiftOpcode = ISD::SRL;
2954       break;
2955     case ISD::SRL:
2956       NewShiftOpcode = ISD::SHL;
2957       break;
2958     default:
2959       return false; // must be a logical shift.
2960     }
2961     // We should be shifting a constant.
2962     // FIXME: best to use isConstantOrConstantVector().
2963     C = V.getOperand(0);
2964     ConstantSDNode *CC =
2965         isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
2966     if (!CC)
2967       return false;
2968     Y = V.getOperand(1);
2969
2970     ConstantSDNode *XC =
2971         isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
2972     return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
2973         X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
2974   };
2975
2976   // LHS of comparison should be an one-use 'and'.
2977   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
2978     return SDValue();
2979
2980   X = N0.getOperand(0);
2981   SDValue Mask = N0.getOperand(1);
2982
2983   // 'and' is commutative!
2984   if (!Match(Mask)) {
2985     std::swap(X, Mask);
2986     if (!Match(Mask))
2987       return SDValue();
2988   }
2989
2990   EVT VT = X.getValueType();
2991
2992   // Produce:
2993   // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
2994   SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
2995   SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
2996   SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
2997   return T2;
2998 }
2999
3000 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3001 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3002 /// handle the commuted versions of these patterns.
3003 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3004                                            ISD::CondCode Cond, const SDLoc &DL,
3005                                            DAGCombinerInfo &DCI) const {
3006   unsigned BOpcode = N0.getOpcode();
3007   assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3008          "Unexpected binop");
3009   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3010
3011   // (X + Y) == X --> Y == 0
3012   // (X - Y) == X --> Y == 0
3013   // (X ^ Y) == X --> Y == 0
3014   SelectionDAG &DAG = DCI.DAG;
3015   EVT OpVT = N0.getValueType();
3016   SDValue X = N0.getOperand(0);
3017   SDValue Y = N0.getOperand(1);
3018   if (X == N1)
3019     return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
3020
3021   if (Y != N1)
3022     return SDValue();
3023
3024   // (X + Y) == Y --> X == 0
3025   // (X ^ Y) == Y --> X == 0
3026   if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
3027     return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
3028
3029   // The shift would not be valid if the operands are boolean (i1).
3030   if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
3031     return SDValue();
3032
3033   // (X - Y) == Y --> X == Y << 1
3034   EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
3035                                  !DCI.isBeforeLegalize());
3036   SDValue One = DAG.getConstant(1, DL, ShiftVT);
3037   SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
3038   if (!DCI.isCalledByLegalizer())
3039     DCI.AddToWorklist(YShl1.getNode());
3040   return DAG.getSetCC(DL, VT, X, YShl1, Cond);
3041 }
3042
3043 /// Try to simplify a setcc built with the specified operands and cc. If it is
3044 /// unable to simplify it, return a null SDValue.
3045 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
3046                                       ISD::CondCode Cond, bool foldBooleans,
3047                                       DAGCombinerInfo &DCI,
3048                                       const SDLoc &dl) const {
3049   SelectionDAG &DAG = DCI.DAG;
3050   EVT OpVT = N0.getValueType();
3051
3052   // Constant fold or commute setcc.
3053   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
3054     return Fold;
3055
3056   // Ensure that the constant occurs on the RHS and fold constant comparisons.
3057   // TODO: Handle non-splat vector constants. All undef causes trouble.
3058   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
3059   if (isConstOrConstSplat(N0) &&
3060       (DCI.isBeforeLegalizeOps() ||
3061        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
3062     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3063
3064   // If we have a subtract with the same 2 non-constant operands as this setcc
3065   // -- but in reverse order -- then try to commute the operands of this setcc
3066   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3067   // instruction on some targets.
3068   if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
3069       (DCI.isBeforeLegalizeOps() ||
3070        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
3071       DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
3072       !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
3073     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
3074
3075   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3076     const APInt &C1 = N1C->getAPIntValue();
3077
3078     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
3079     // equality comparison, then we're just comparing whether X itself is
3080     // zero.
3081     if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
3082         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
3083         N0.getOperand(1).getOpcode() == ISD::Constant) {
3084       const APInt &ShAmt = N0.getConstantOperandAPInt(1);
3085       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3086           ShAmt == Log2_32(N0.getValueSizeInBits())) {
3087         if ((C1 == 0) == (Cond == ISD::SETEQ)) {
3088           // (srl (ctlz x), 5) == 0  -> X != 0
3089           // (srl (ctlz x), 5) != 1  -> X != 0
3090           Cond = ISD::SETNE;
3091         } else {
3092           // (srl (ctlz x), 5) != 0  -> X == 0
3093           // (srl (ctlz x), 5) == 1  -> X == 0
3094           Cond = ISD::SETEQ;
3095         }
3096         SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
3097         return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
3098                             Zero, Cond);
3099       }
3100     }
3101
3102     SDValue CTPOP = N0;
3103     // Look through truncs that don't change the value of a ctpop.
3104     if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
3105       CTPOP = N0.getOperand(0);
3106
3107     if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
3108         (N0 == CTPOP ||
3109          N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
3110       EVT CTVT = CTPOP.getValueType();
3111       SDValue CTOp = CTPOP.getOperand(0);
3112
3113       // (ctpop x) u< 2 -> (x & x-1) == 0
3114       // (ctpop x) u> 1 -> (x & x-1) != 0
3115       if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
3116         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3117         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3118         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3119         ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
3120         return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
3121       }
3122
3123       // If ctpop is not supported, expand a power-of-2 comparison based on it.
3124       if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
3125           (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3126         // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3127         // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3128         SDValue Zero = DAG.getConstant(0, dl, CTVT);
3129         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
3130         ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true);
3131         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
3132         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
3133         SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
3134         SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
3135         unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
3136         return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
3137       }
3138     }
3139
3140     // (zext x) == C --> x == (trunc C)
3141     // (sext x) == C --> x == (trunc C)
3142     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3143         DCI.isBeforeLegalize() && N0->hasOneUse()) {
3144       unsigned MinBits = N0.getValueSizeInBits();
3145       SDValue PreExt;
3146       bool Signed = false;
3147       if (N0->getOpcode() == ISD::ZERO_EXTEND) {
3148         // ZExt
3149         MinBits = N0->getOperand(0).getValueSizeInBits();
3150         PreExt = N0->getOperand(0);
3151       } else if (N0->getOpcode() == ISD::AND) {
3152         // DAGCombine turns costly ZExts into ANDs
3153         if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
3154           if ((C->getAPIntValue()+1).isPowerOf2()) {
3155             MinBits = C->getAPIntValue().countTrailingOnes();
3156             PreExt = N0->getOperand(0);
3157           }
3158       } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
3159         // SExt
3160         MinBits = N0->getOperand(0).getValueSizeInBits();
3161         PreExt = N0->getOperand(0);
3162         Signed = true;
3163       } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
3164         // ZEXTLOAD / SEXTLOAD
3165         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
3166           MinBits = LN0->getMemoryVT().getSizeInBits();
3167           PreExt = N0;
3168         } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
3169           Signed = true;
3170           MinBits = LN0->getMemoryVT().getSizeInBits();
3171           PreExt = N0;
3172         }
3173       }
3174
3175       // Figure out how many bits we need to preserve this constant.
3176       unsigned ReqdBits = Signed ?
3177         C1.getBitWidth() - C1.getNumSignBits() + 1 :
3178         C1.getActiveBits();
3179
3180       // Make sure we're not losing bits from the constant.
3181       if (MinBits > 0 &&
3182           MinBits < C1.getBitWidth() &&
3183           MinBits >= ReqdBits) {
3184         EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
3185         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
3186           // Will get folded away.
3187           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
3188           if (MinBits == 1 && C1 == 1)
3189             // Invert the condition.
3190             return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
3191                                 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3192           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
3193           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
3194         }
3195
3196         // If truncating the setcc operands is not desirable, we can still
3197         // simplify the expression in some cases:
3198         // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
3199         // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
3200         // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
3201         // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
3202         // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
3203         // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
3204         SDValue TopSetCC = N0->getOperand(0);
3205         unsigned N0Opc = N0->getOpcode();
3206         bool SExt = (N0Opc == ISD::SIGN_EXTEND);
3207         if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
3208             TopSetCC.getOpcode() == ISD::SETCC &&
3209             (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
3210             (isConstFalseVal(N1C) ||
3211              isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
3212
3213           bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
3214                          (!N1C->isNullValue() && Cond == ISD::SETNE);
3215
3216           if (!Inverse)
3217             return TopSetCC;
3218
3219           ISD::CondCode InvCond = ISD::getSetCCInverse(
3220               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
3221               TopSetCC.getOperand(0).getValueType().isInteger());
3222           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
3223                                       TopSetCC.getOperand(1),
3224                                       InvCond);
3225         }
3226       }
3227     }
3228
3229     // If the LHS is '(and load, const)', the RHS is 0, the test is for
3230     // equality or unsigned, and all 1 bits of the const are in the same
3231     // partial word, see if we can shorten the load.
3232     if (DCI.isBeforeLegalize() &&
3233         !ISD::isSignedIntSetCC(Cond) &&
3234         N0.getOpcode() == ISD::AND && C1 == 0 &&
3235         N0.getNode()->hasOneUse() &&
3236         isa<LoadSDNode>(N0.getOperand(0)) &&
3237         N0.getOperand(0).getNode()->hasOneUse() &&
3238         isa<ConstantSDNode>(N0.getOperand(1))) {
3239       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
3240       APInt bestMask;
3241       unsigned bestWidth = 0, bestOffset = 0;
3242       if (Lod->isSimple() && Lod->isUnindexed()) {
3243         unsigned origWidth = N0.getValueSizeInBits();
3244         unsigned maskWidth = origWidth;
3245         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
3246         // 8 bits, but have to be careful...
3247         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
3248           origWidth = Lod->getMemoryVT().getSizeInBits();
3249         const APInt &Mask = N0.getConstantOperandAPInt(1);
3250         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
3251           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
3252           for (unsigned offset=0; offset<origWidth/width; offset++) {
3253             if (Mask.isSubsetOf(newMask)) {
3254               if (DAG.getDataLayout().isLittleEndian())
3255                 bestOffset = (uint64_t)offset * (width/8);
3256               else
3257                 bestOffset = (origWidth/width - offset - 1) * (width/8);
3258               bestMask = Mask.lshr(offset * (width/8) * 8);
3259               bestWidth = width;
3260               break;
3261             }
3262             newMask <<= width;
3263           }
3264         }
3265       }
3266       if (bestWidth) {
3267         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
3268         if (newVT.isRound() &&
3269             shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
3270           EVT PtrType = Lod->getOperand(1).getValueType();
3271           SDValue Ptr = Lod->getBasePtr();
3272           if (bestOffset != 0)
3273             Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
3274                               DAG.getConstant(bestOffset, dl, PtrType));
3275           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
3276           SDValue NewLoad = DAG.getLoad(
3277               newVT, dl, Lod->getChain(), Ptr,
3278               Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
3279           return DAG.getSetCC(dl, VT,
3280                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
3281                                       DAG.getConstant(bestMask.trunc(bestWidth),
3282                                                       dl, newVT)),
3283                               DAG.getConstant(0LL, dl, newVT), Cond);
3284         }
3285       }
3286     }
3287
3288     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
3289     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
3290       unsigned InSize = N0.getOperand(0).getValueSizeInBits();
3291
3292       // If the comparison constant has bits in the upper part, the
3293       // zero-extended value could never match.
3294       if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
3295                                               C1.getBitWidth() - InSize))) {
3296         switch (Cond) {
3297         case ISD::SETUGT:
3298         case ISD::SETUGE:
3299         case ISD::SETEQ:
3300           return DAG.getConstant(0, dl, VT);
3301         case ISD::SETULT:
3302         case ISD::SETULE:
3303         case ISD::SETNE:
3304           return DAG.getConstant(1, dl, VT);
3305         case ISD::SETGT:
3306         case ISD::SETGE:
3307           // True if the sign bit of C1 is set.
3308           return DAG.getConstant(C1.isNegative(), dl, VT);
3309         case ISD::SETLT:
3310         case ISD::SETLE:
3311           // True if the sign bit of C1 isn't set.
3312           return DAG.getConstant(C1.isNonNegative(), dl, VT);
3313         default:
3314           break;
3315         }
3316       }
3317
3318       // Otherwise, we can perform the comparison with the low bits.
3319       switch (Cond) {
3320       case ISD::SETEQ:
3321       case ISD::SETNE:
3322       case ISD::SETUGT:
3323       case ISD::SETUGE:
3324       case ISD::SETULT:
3325       case ISD::SETULE: {
3326         EVT newVT = N0.getOperand(0).getValueType();
3327         if (DCI.isBeforeLegalizeOps() ||
3328             (isOperationLegal(ISD::SETCC, newVT) &&
3329              isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
3330           EVT NewSetCCVT =
3331               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
3332           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
3333
3334           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
3335                                           NewConst, Cond);
3336           return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
3337         }
3338         break;
3339       }
3340       default:
3341         break; // todo, be more careful with signed comparisons
3342       }
3343     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3344                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3345       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
3346       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
3347       EVT ExtDstTy = N0.getValueType();
3348       unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
3349
3350       // If the constant doesn't fit into the number of bits for the source of
3351       // the sign extension, it is impossible for both sides to be equal.
3352       if (C1.getMinSignedBits() > ExtSrcTyBits)
3353         return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
3354
3355       SDValue ZextOp;
3356       EVT Op0Ty = N0.getOperand(0).getValueType();
3357       if (Op0Ty == ExtSrcTy) {
3358         ZextOp = N0.getOperand(0);
3359       } else {
3360         APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
3361         ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
3362                              DAG.getConstant(Imm, dl, Op0Ty));
3363       }
3364       if (!DCI.isCalledByLegalizer())
3365         DCI.AddToWorklist(ZextOp.getNode());
3366       // Otherwise, make this a use of a zext.
3367       return DAG.getSetCC(dl, VT, ZextOp,
3368                           DAG.getConstant(C1 & APInt::getLowBitsSet(
3369                                                               ExtDstTyBits,
3370                                                               ExtSrcTyBits),
3371                                           dl, ExtDstTy),
3372                           Cond);
3373     } else if ((N1C->isNullValue() || N1C->isOne()) &&
3374                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3375       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
3376       if (N0.getOpcode() == ISD::SETCC &&
3377           isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
3378         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
3379         if (TrueWhenTrue)
3380           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
3381         // Invert the condition.
3382         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
3383         CC = ISD::getSetCCInverse(CC,
3384                                   N0.getOperand(0).getValueType().isInteger());
3385         if (DCI.isBeforeLegalizeOps() ||
3386             isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
3387           return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
3388       }
3389
3390       if ((N0.getOpcode() == ISD::XOR ||
3391            (N0.getOpcode() == ISD::AND &&
3392             N0.getOperand(0).getOpcode() == ISD::XOR &&
3393             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
3394           isa<ConstantSDNode>(N0.getOperand(1)) &&
3395           cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
3396         // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
3397         // can only do this if the top bits are known zero.
3398         unsigned BitWidth = N0.getValueSizeInBits();
3399         if (DAG.MaskedValueIsZero(N0,
3400                                   APInt::getHighBitsSet(BitWidth,
3401                                                         BitWidth-1))) {
3402           // Okay, get the un-inverted input value.
3403           SDValue Val;
3404           if (N0.getOpcode() == ISD::XOR) {
3405             Val = N0.getOperand(0);
3406           } else {
3407             assert(N0.getOpcode() == ISD::AND &&
3408                     N0.getOperand(0).getOpcode() == ISD::XOR);
3409             // ((X^1)&1)^1 -> X & 1
3410             Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
3411                               N0.getOperand(0).getOperand(0),
3412                               N0.getOperand(1));
3413           }
3414
3415           return DAG.getSetCC(dl, VT, Val, N1,
3416                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3417         }
3418       } else if (N1C->isOne() &&
3419                  (VT == MVT::i1 ||
3420                   getBooleanContents(N0->getValueType(0)) ==
3421                       ZeroOrOneBooleanContent)) {
3422         SDValue Op0 = N0;
3423         if (Op0.getOpcode() == ISD::TRUNCATE)
3424           Op0 = Op0.getOperand(0);
3425
3426         if ((Op0.getOpcode() == ISD::XOR) &&
3427             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
3428             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
3429           // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
3430           Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
3431           return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
3432                               Cond);
3433         }
3434         if (Op0.getOpcode() == ISD::AND &&
3435             isa<ConstantSDNode>(Op0.getOperand(1)) &&
3436             cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
3437           // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
3438           if (Op0.getValueType().bitsGT(VT))
3439             Op0 = DAG.getNode(ISD::AND, dl, VT,
3440                           DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
3441                           DAG.getConstant(1, dl, VT));
3442           else if (Op0.getValueType().bitsLT(VT))
3443             Op0 = DAG.getNode(ISD::AND, dl, VT,
3444                         DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
3445                         DAG.getConstant(1, dl, VT));
3446
3447           return DAG.getSetCC(dl, VT, Op0,
3448                               DAG.getConstant(0, dl, Op0.getValueType()),
3449                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3450         }
3451         if (Op0.getOpcode() == ISD::AssertZext &&
3452             cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
3453           return DAG.getSetCC(dl, VT, Op0,
3454                               DAG.getConstant(0, dl, Op0.getValueType()),
3455                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
3456       }
3457     }
3458
3459     // Given:
3460     //   icmp eq/ne (urem %x, %y), 0
3461     // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
3462     //   icmp eq/ne %x, 0
3463     if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
3464         (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3465       KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
3466       KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
3467       if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
3468         return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
3469     }
3470
3471     if (SDValue V =
3472             optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
3473       return V;
3474   }
3475
3476   // These simplifications apply to splat vectors as well.
3477   // TODO: Handle more splat vector cases.
3478   if (auto *N1C = isConstOrConstSplat(N1)) {
3479     const APInt &C1 = N1C->getAPIntValue();
3480
3481     APInt MinVal, MaxVal;
3482     unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
3483     if (ISD::isSignedIntSetCC(Cond)) {
3484       MinVal = APInt::getSignedMinValue(OperandBitSize);
3485       MaxVal = APInt::getSignedMaxValue(OperandBitSize);
3486     } else {
3487       MinVal = APInt::getMinValue(OperandBitSize);
3488       MaxVal = APInt::getMaxValue(OperandBitSize);
3489     }
3490
3491     // Canonicalize GE/LE comparisons to use GT/LT comparisons.
3492     if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
3493       // X >= MIN --> true
3494       if (C1 == MinVal)
3495         return DAG.getBoolConstant(true, dl, VT, OpVT);
3496
3497       if (!VT.isVector()) { // TODO: Support this for vectors.
3498         // X >= C0 --> X > (C0 - 1)
3499         APInt C = C1 - 1;
3500         ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
3501         if ((DCI.isBeforeLegalizeOps() ||
3502              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3503             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3504                                   isLegalICmpImmediate(C.getSExtValue())))) {
3505           return DAG.getSetCC(dl, VT, N0,
3506                               DAG.getConstant(C, dl, N1.getValueType()),
3507                               NewCC);
3508         }
3509       }
3510     }
3511
3512     if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
3513       // X <= MAX --> true
3514       if (C1 == MaxVal)
3515         return DAG.getBoolConstant(true, dl, VT, OpVT);
3516
3517       // X <= C0 --> X < (C0 + 1)
3518       if (!VT.isVector()) { // TODO: Support this for vectors.
3519         APInt C = C1 + 1;
3520         ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
3521         if ((DCI.isBeforeLegalizeOps() ||
3522              isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
3523             (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
3524                                   isLegalICmpImmediate(C.getSExtValue())))) {
3525           return DAG.getSetCC(dl, VT, N0,
3526                               DAG.getConstant(C, dl, N1.getValueType()),
3527                               NewCC);
3528         }
3529       }
3530     }
3531
3532     if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
3533       if (C1 == MinVal)
3534         return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
3535
3536       // TODO: Support this for vectors after legalize ops.
3537       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3538         // Canonicalize setlt X, Max --> setne X, Max
3539         if (C1 == MaxVal)
3540           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3541
3542         // If we have setult X, 1, turn it into seteq X, 0
3543         if (C1 == MinVal+1)
3544           return DAG.getSetCC(dl, VT, N0,
3545                               DAG.getConstant(MinVal, dl, N0.getValueType()),
3546                               ISD::SETEQ);
3547       }
3548     }
3549
3550     if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
3551       if (C1 == MaxVal)
3552         return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
3553
3554       // TODO: Support this for vectors after legalize ops.
3555       if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3556         // Canonicalize setgt X, Min --> setne X, Min
3557         if (C1 == MinVal)
3558           return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
3559
3560         // If we have setugt X, Max-1, turn it into seteq X, Max
3561         if (C1 == MaxVal-1)
3562           return DAG.getSetCC(dl, VT, N0,
3563                               DAG.getConstant(MaxVal, dl, N0.getValueType()),
3564                               ISD::SETEQ);
3565       }
3566     }
3567
3568     if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
3569       // (X & (C l>>/<< Y)) ==/!= 0  -->  ((X <</l>> Y) & C) ==/!= 0
3570       if (C1.isNullValue())
3571         if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
3572                 VT, N0, N1, Cond, DCI, dl))
3573           return CC;
3574     }
3575
3576     // If we have "setcc X, C0", check to see if we can shrink the immediate
3577     // by changing cc.
3578     // TODO: Support this for vectors after legalize ops.
3579     if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
3580       // SETUGT X, SINTMAX  -> SETLT X, 0
3581       if (Cond == ISD::SETUGT &&
3582           C1 == APInt::getSignedMaxValue(OperandBitSize))
3583         return DAG.getSetCC(dl, VT, N0,
3584                             DAG.getConstant(0, dl, N1.getValueType()),
3585                             ISD::SETLT);
3586
3587       // SETULT X, SINTMIN  -> SETGT X, -1
3588       if (Cond == ISD::SETULT &&
3589           C1 == APInt::getSignedMinValue(OperandBitSize)) {
3590         SDValue ConstMinusOne =
3591             DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
3592                             N1.getValueType());
3593         return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
3594       }
3595     }
3596   }
3597
3598   // Back to non-vector simplifications.
3599   // TODO: Can we do these for vector splats?
3600   if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
3601     const APInt &C1 = N1C->getAPIntValue();
3602
3603     // Fold bit comparisons when we can.
3604     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3605         (VT == N0.getValueType() ||
3606          (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
3607         N0.getOpcode() == ISD::AND) {
3608       auto &DL = DAG.getDataLayout();
3609       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3610         EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
3611                                        !DCI.isBeforeLegalize());
3612         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
3613           // Perform the xform if the AND RHS is a single bit.
3614           if (AndRHS->getAPIntValue().isPowerOf2()) {
3615             return DAG.getNode(ISD::TRUNCATE, dl, VT,
3616                               DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
3617                    DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
3618                                    ShiftTy)));
3619           }
3620         } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
3621           // (X & 8) == 8  -->  (X & 8) >> 3
3622           // Perform the xform if C1 is a single bit.
3623           if (C1.isPowerOf2()) {
3624             return DAG.getNode(ISD::TRUNCATE, dl, VT,
3625                                DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
3626                                       DAG.getConstant(C1.logBase2(), dl,
3627                                                       ShiftTy)));
3628           }
3629         }
3630       }
3631     }
3632
3633     if (C1.getMinSignedBits() <= 64 &&
3634         !isLegalICmpImmediate(C1.getSExtValue())) {
3635       // (X & -256) == 256 -> (X >> 8) == 1
3636       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3637           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
3638         if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3639           const APInt &AndRHSC = AndRHS->getAPIntValue();
3640           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
3641             unsigned ShiftBits = AndRHSC.countTrailingZeros();
3642             auto &DL = DAG.getDataLayout();
3643             EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
3644                                            !DCI.isBeforeLegalize());
3645             EVT CmpTy = N0.getValueType();
3646             SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
3647                                         DAG.getConstant(ShiftBits, dl,
3648                                                         ShiftTy));
3649             SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
3650             return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
3651           }
3652         }
3653       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
3654                  Cond == ISD::SETULE || Cond == ISD::SETUGT) {
3655         bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
3656         // X <  0x100000000 -> (X >> 32) <  1
3657         // X >= 0x100000000 -> (X >> 32) >= 1
3658         // X <= 0x0ffffffff -> (X >> 32) <  1
3659         // X >  0x0ffffffff -> (X >> 32) >= 1
3660         unsigned ShiftBits;
3661         APInt NewC = C1;
3662         ISD::CondCode NewCond = Cond;
3663         if (AdjOne) {
3664           ShiftBits = C1.countTrailingOnes();
3665           NewC = NewC + 1;
3666           NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3667         } else {
3668           ShiftBits = C1.countTrailingZeros();
3669         }
3670         NewC.lshrInPlace(ShiftBits);
3671         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
3672           isLegalICmpImmediate(NewC.getSExtValue())) {
3673           auto &DL = DAG.getDataLayout();
3674           EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
3675                                          !DCI.isBeforeLegalize());
3676           EVT CmpTy = N0.getValueType();
3677           SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
3678                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
3679           SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
3680           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
3681         }
3682       }
3683     }
3684   }
3685
3686   if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
3687     auto *CFP = cast<ConstantFPSDNode>(N1);
3688     assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
3689
3690     // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
3691     // constant if knowing that the operand is non-nan is enough.  We prefer to
3692     // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
3693     // materialize 0.0.
3694     if (Cond == ISD::SETO || Cond == ISD::SETUO)
3695       return DAG.getSetCC(dl, VT, N0, N0, Cond);
3696
3697     // setcc (fneg x), C -> setcc swap(pred) x, -C
3698     if (N0.getOpcode() == ISD::FNEG) {
3699       ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
3700       if (DCI.isBeforeLegalizeOps() ||
3701           isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
3702         SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
3703         return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
3704       }
3705     }
3706
3707     // If the condition is not legal, see if we can find an equivalent one
3708     // which is legal.
3709     if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
3710       // If the comparison was an awkward floating-point == or != and one of
3711       // the comparison operands is infinity or negative infinity, convert the
3712       // condition to a less-awkward <= or >=.
3713       if (CFP->getValueAPF().isInfinity()) {
3714         if (CFP->getValueAPF().isNegative()) {
3715           if (Cond == ISD::SETOEQ &&
3716               isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
3717             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
3718           if (Cond == ISD::SETUEQ &&
3719               isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
3720             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
3721           if (Cond == ISD::SETUNE &&
3722               isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
3723             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
3724           if (Cond == ISD::SETONE &&
3725               isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
3726             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
3727         } else {
3728           if (Cond == ISD::SETOEQ &&
3729               isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
3730             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
3731           if (Cond == ISD::SETUEQ &&
3732               isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
3733             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
3734           if (Cond == ISD::SETUNE &&
3735               isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
3736             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
3737           if (Cond == ISD::SETONE &&
3738               isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
3739             return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
3740         }
3741       }
3742     }
3743   }
3744
3745   if (N0 == N1) {
3746     // The sext(setcc()) => setcc() optimization relies on the appropriate
3747     // constant being emitted.
3748     assert(!N0.getValueType().isInteger() &&
3749            "Integer types should be handled by FoldSetCC");
3750
3751     bool EqTrue = ISD::isTrueWhenEqual(Cond);
3752     unsigned UOF = ISD::getUnorderedFlavor(Cond);
3753     if (UOF == 2) // FP operators that are undefined on NaNs.
3754       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
3755     if (UOF == unsigned(EqTrue))
3756       return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
3757     // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
3758     // if it is not already.
3759     ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
3760     if (NewCond != Cond &&
3761         (DCI.isBeforeLegalizeOps() ||
3762                             isCondCodeLegal(NewCond, N0.getSimpleValueType())))
3763       return DAG.getSetCC(dl, VT, N0, N1, NewCond);
3764   }
3765
3766   if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3767       N0.getValueType().isInteger()) {
3768     if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
3769         N0.getOpcode() == ISD::XOR) {
3770       // Simplify (X+Y) == (X+Z) -->  Y == Z
3771       if (N0.getOpcode() == N1.getOpcode()) {
3772         if (N0.getOperand(0) == N1.getOperand(0))
3773           return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
3774         if (N0.getOperand(1) == N1.getOperand(1))
3775           return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
3776         if (isCommutativeBinOp(N0.getOpcode())) {
3777           // If X op Y == Y op X, try other combinations.
3778           if (N0.getOperand(0) == N1.getOperand(1))
3779             return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
3780                                 Cond);
3781           if (N0.getOperand(1) == N1.getOperand(0))
3782             return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
3783                                 Cond);
3784         }
3785       }
3786
3787       // If RHS is a legal immediate value for a compare instruction, we need
3788       // to be careful about increasing register pressure needlessly.
3789       bool LegalRHSImm = false;
3790
3791       if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
3792         if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3793           // Turn (X+C1) == C2 --> X == C2-C1
3794           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
3795             return DAG.getSetCC(dl, VT, N0.getOperand(0),
3796                                 DAG.getConstant(RHSC->getAPIntValue()-
3797                                                 LHSR->getAPIntValue(),
3798                                 dl, N0.getValueType()), Cond);
3799           }
3800
3801           // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
3802           if (N0.getOpcode() == ISD::XOR)
3803             // If we know that all of the inverted bits are zero, don't bother
3804             // performing the inversion.
3805             if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
3806               return
3807                 DAG.getSetCC(dl, VT, N0.getOperand(0),
3808                              DAG.getConstant(LHSR->getAPIntValue() ^
3809                                                RHSC->getAPIntValue(),
3810                                              dl, N0.getValueType()),
3811                              Cond);
3812         }
3813
3814         // Turn (C1-X) == C2 --> X == C1-C2
3815         if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
3816           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
3817             return
3818               DAG.getSetCC(dl, VT, N0.getOperand(1),
3819                            DAG.getConstant(SUBC->getAPIntValue() -
3820                                              RHSC->getAPIntValue(),
3821                                            dl, N0.getValueType()),
3822                            Cond);
3823           }
3824         }
3825
3826         // Could RHSC fold directly into a compare?
3827         if (RHSC->getValueType(0).getSizeInBits() <= 64)
3828           LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
3829       }
3830
3831       // (X+Y) == X --> Y == 0 and similar folds.
3832       // Don't do this if X is an immediate that can fold into a cmp
3833       // instruction and X+Y has other uses. It could be an induction variable
3834       // chain, and the transform would increase register pressure.
3835       if (!LegalRHSImm || N0.hasOneUse())
3836         if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
3837           return V;
3838     }
3839
3840     if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
3841         N1.getOpcode() == ISD::XOR)
3842       if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
3843         return V;
3844
3845     if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
3846       return V;
3847   }
3848
3849   // Fold remainder of division by a constant.
3850   if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
3851       N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
3852     AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3853
3854     // When division is cheap or optimizing for minimum size,
3855     // fall through to DIVREM creation by skipping this fold.
3856     if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
3857       if (N0.getOpcode() == ISD::UREM) {
3858         if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
3859           return Folded;
3860       } else if (N0.getOpcode() == ISD::SREM) {
3861         if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
3862           return Folded;
3863       }
3864     }
3865   }
3866
3867   // Fold away ALL boolean setcc's.
3868   if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
3869     SDValue Temp;
3870     switch (Cond) {
3871     default: llvm_unreachable("Unknown integer setcc!");
3872     case ISD::SETEQ:  // X == Y  -> ~(X^Y)
3873       Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
3874       N0 = DAG.getNOT(dl, Temp, OpVT);
3875       if (!DCI.isCalledByLegalizer())
3876         DCI.AddToWorklist(Temp.getNode());
3877       break;
3878     case ISD::SETNE:  // X != Y   -->  (X^Y)
3879       N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
3880       break;
3881     case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
3882     case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
3883       Temp = DAG.getNOT(dl, N0, OpVT);
3884       N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
3885       if (!DCI.isCalledByLegalizer())
3886         DCI.AddToWorklist(Temp.getNode());
3887       break;
3888     case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
3889     case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
3890       Temp = DAG.getNOT(dl, N1, OpVT);
3891       N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
3892       if (!DCI.isCalledByLegalizer())
3893         DCI.AddToWorklist(Temp.getNode());
3894       break;
3895     case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
3896     case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
3897       Temp = DAG.getNOT(dl, N0, OpVT);
3898       N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
3899       if (!DCI.isCalledByLegalizer())
3900         DCI.AddToWorklist(Temp.getNode());
3901       break;
3902     case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
3903     case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
3904       Temp = DAG.getNOT(dl, N1, OpVT);
3905       N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
3906       break;
3907     }
3908     if (VT.getScalarType() != MVT::i1) {
3909       if (!DCI.isCalledByLegalizer())
3910         DCI.AddToWorklist(N0.getNode());
3911       // FIXME: If running after legalize, we probably can't do this.
3912       ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
3913       N0 = DAG.getNode(ExtendCode, dl, VT, N0);
3914     }
3915     return N0;
3916   }
3917
3918   // Could not fold it.
3919   return SDValue();
3920 }
3921
3922 /// Returns true (and the GlobalValue and the offset) if the node is a
3923 /// GlobalAddress + offset.
3924 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
3925                                     int64_t &Offset) const {
3926
3927   SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
3928
3929   if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
3930     GA = GASD->getGlobal();
3931     Offset += GASD->getOffset();
3932     return true;
3933   }
3934
3935   if (N->getOpcode() == ISD::ADD) {
3936     SDValue N1 = N->getOperand(0);
3937     SDValue N2 = N->getOperand(1);
3938     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
3939       if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
3940         Offset += V->getSExtValue();
3941         return true;
3942       }
3943     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
3944       if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
3945         Offset += V->getSExtValue();
3946         return true;
3947       }
3948     }
3949   }
3950
3951   return false;
3952 }
3953
3954 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
3955                                           DAGCombinerInfo &DCI) const {
3956   // Default implementation: no optimization.
3957   return SDValue();
3958 }
3959
3960 //===----------------------------------------------------------------------===//
3961 //  Inline Assembler Implementation Methods
3962 //===----------------------------------------------------------------------===//
3963
3964 TargetLowering::ConstraintType
3965 TargetLowering::getConstraintType(StringRef Constraint) const {
3966   unsigned S = Constraint.size();
3967
3968   if (S == 1) {
3969     switch (Constraint[0]) {
3970     default: break;
3971     case 'r':
3972       return C_RegisterClass;
3973     case 'm': // memory
3974     case 'o': // offsetable
3975     case 'V': // not offsetable
3976       return C_Memory;
3977     case 'n': // Simple Integer
3978     case 'E': // Floating Point Constant
3979     case 'F': // Floating Point Constant
3980       return C_Immediate;
3981     case 'i': // Simple Integer or Relocatable Constant
3982     case 's': // Relocatable Constant
3983     case 'p': // Address.
3984     case 'X': // Allow ANY value.
3985     case 'I': // Target registers.
3986     case 'J':
3987     case 'K':
3988     case 'L':
3989     case 'M':
3990     case 'N':
3991     case 'O':
3992     case 'P':
3993     case '<':
3994     case '>':
3995       return C_Other;
3996     }
3997   }
3998
3999   if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
4000     if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
4001       return C_Memory;
4002     return C_Register;
4003   }
4004   return C_Unknown;
4005 }
4006
4007 /// Try to replace an X constraint, which matches anything, with another that
4008 /// has more specific requirements based on the type of the corresponding
4009 /// operand.
4010 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
4011   if (ConstraintVT.isInteger())
4012     return "r";
4013   if (ConstraintVT.isFloatingPoint())
4014     return "f"; // works for many targets
4015   return nullptr;
4016 }
4017
4018 SDValue TargetLowering::LowerAsmOutputForConstraint(
4019     SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
4020     SelectionDAG &DAG) const {
4021   return SDValue();
4022 }
4023
4024 /// Lower the specified operand into the Ops vector.
4025 /// If it is invalid, don't add anything to Ops.
4026 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
4027                                                   std::string &Constraint,
4028                                                   std::vector<SDValue> &Ops,
4029                                                   SelectionDAG &DAG) const {
4030
4031   if (Constraint.length() > 1) return;
4032
4033   char ConstraintLetter = Constraint[0];
4034   switch (ConstraintLetter) {
4035   default: break;
4036   case 'X':     // Allows any operand; labels (basic block) use this.
4037     if (Op.getOpcode() == ISD::BasicBlock ||
4038         Op.getOpcode() == ISD::TargetBlockAddress) {
4039       Ops.push_back(Op);
4040       return;
4041     }
4042     LLVM_FALLTHROUGH;
4043   case 'i':    // Simple Integer or Relocatable Constant
4044   case 'n':    // Simple Integer
4045   case 's': {  // Relocatable Constant
4046
4047     GlobalAddressSDNode *GA;
4048     ConstantSDNode *C;
4049     BlockAddressSDNode *BA;
4050     uint64_t Offset = 0;
4051
4052     // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
4053     // etc., since getelementpointer is variadic. We can't use
4054     // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
4055     // while in this case the GA may be furthest from the root node which is
4056     // likely an ISD::ADD.
4057     while (1) {
4058       if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
4059         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
4060                                                  GA->getValueType(0),
4061                                                  Offset + GA->getOffset()));
4062         return;
4063       } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
4064                  ConstraintLetter != 's') {
4065         // gcc prints these as sign extended.  Sign extend value to 64 bits
4066         // now; without this it would get ZExt'd later in
4067         // ScheduleDAGSDNodes::EmitNode, which is very generic.
4068         bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
4069         BooleanContent BCont = getBooleanContents(MVT::i64);
4070         ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
4071                                       : ISD::SIGN_EXTEND;
4072         int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
4073                                                     : C->getSExtValue();
4074         Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
4075                                             SDLoc(C), MVT::i64));
4076         return;
4077       } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
4078                  ConstraintLetter != 'n') {
4079         Ops.push_back(DAG.getTargetBlockAddress(
4080             BA->getBlockAddress(), BA->getValueType(0),
4081             Offset + BA->getOffset(), BA->getTargetFlags()));
4082         return;
4083       } else {
4084         const unsigned OpCode = Op.getOpcode();
4085         if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
4086           if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
4087             Op = Op.getOperand(1);
4088           // Subtraction is not commutative.
4089           else if (OpCode == ISD::ADD &&
4090                    (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
4091             Op = Op.getOperand(0);
4092           else
4093             return;
4094           Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
4095           continue;
4096         }
4097       }
4098       return;
4099     }
4100     break;
4101   }
4102   }
4103 }
4104
4105 std::pair<unsigned, const TargetRegisterClass *>
4106 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
4107                                              StringRef Constraint,
4108                                              MVT VT) const {
4109   if (Constraint.empty() || Constraint[0] != '{')
4110     return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
4111   assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
4112
4113   // Remove the braces from around the name.
4114   StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
4115
4116   std::pair<unsigned, const TargetRegisterClass *> R =
4117       std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
4118
4119   // Figure out which register class contains this reg.
4120   for (const TargetRegisterClass *RC : RI->regclasses()) {
4121     // If none of the value types for this register class are valid, we
4122     // can't use it.  For example, 64-bit reg classes on 32-bit targets.
4123     if (!isLegalRC(*RI, *RC))
4124       continue;
4125
4126     for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
4127          I != E; ++I) {
4128       if (RegName.equals_lower(RI->getRegAsmName(*I))) {
4129         std::pair<unsigned, const TargetRegisterClass *> S =
4130             std::make_pair(*I, RC);
4131
4132         // If this register class has the requested value type, return it,
4133         // otherwise keep searching and return the first class found
4134         // if no other is found which explicitly has the requested type.
4135         if (RI->isTypeLegalForClass(*RC, VT))
4136           return S;
4137         if (!R.second)
4138           R = S;
4139       }
4140     }
4141   }
4142
4143   return R;
4144 }
4145
4146 //===----------------------------------------------------------------------===//
4147 // Constraint Selection.
4148
4149 /// Return true of this is an input operand that is a matching constraint like
4150 /// "4".
4151 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
4152   assert(!ConstraintCode.empty() && "No known constraint!");
4153   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
4154 }
4155
4156 /// If this is an input matching constraint, this method returns the output
4157 /// operand it matches.
4158 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
4159   assert(!ConstraintCode.empty() && "No known constraint!");
4160   return atoi(ConstraintCode.c_str());
4161 }
4162
4163 /// Split up the constraint string from the inline assembly value into the
4164 /// specific constraints and their prefixes, and also tie in the associated
4165 /// operand values.
4166 /// If this returns an empty vector, and if the constraint string itself
4167 /// isn't empty, there was an error parsing.
4168 TargetLowering::AsmOperandInfoVector
4169 TargetLowering::ParseConstraints(const DataLayout &DL,
4170                                  const TargetRegisterInfo *TRI,
4171                                  ImmutableCallSite CS) const {
4172   /// Information about all of the constraints.
4173   AsmOperandInfoVector ConstraintOperands;
4174   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
4175   unsigned maCount = 0; // Largest number of multiple alternative constraints.
4176
4177   // Do a prepass over the constraints, canonicalizing them, and building up the
4178   // ConstraintOperands list.
4179   unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
4180   unsigned ResNo = 0; // ResNo - The result number of the next output.
4181
4182   for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
4183     ConstraintOperands.emplace_back(std::move(CI));
4184     AsmOperandInfo &OpInfo = ConstraintOperands.back();
4185
4186     // Update multiple alternative constraint count.
4187     if (OpInfo.multipleAlternatives.size() > maCount)
4188       maCount = OpInfo.multipleAlternatives.size();
4189
4190     OpInfo.ConstraintVT = MVT::Other;
4191
4192     // Compute the value type for each operand.
4193     switch (OpInfo.Type) {
4194     case InlineAsm::isOutput:
4195       // Indirect outputs just consume an argument.
4196       if (OpInfo.isIndirect) {
4197         OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
4198         break;
4199       }
4200
4201       // The return value of the call is this value.  As such, there is no
4202       // corresponding argument.
4203       assert(!CS.getType()->isVoidTy() &&
4204              "Bad inline asm!");
4205       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
4206         OpInfo.ConstraintVT =
4207             getSimpleValueType(DL, STy->getElementType(ResNo));
4208       } else {
4209         assert(ResNo == 0 && "Asm only has one result!");
4210         OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
4211       }
4212       ++ResNo;
4213       break;
4214     case InlineAsm::isInput:
4215       OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
4216       break;
4217     case InlineAsm::isClobber:
4218       // Nothing to do.
4219       break;
4220     }
4221
4222     if (OpInfo.CallOperandVal) {
4223       llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
4224       if (OpInfo.isIndirect) {
4225         llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
4226         if (!PtrTy)
4227           report_fatal_error("Indirect operand for inline asm not a pointer!");
4228         OpTy = PtrTy->getElementType();
4229       }
4230
4231       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
4232       if (StructType *STy = dyn_cast<StructType>(OpTy))
4233         if (STy->getNumElements() == 1)
4234           OpTy = STy->getElementType(0);
4235
4236       // If OpTy is not a single value, it may be a struct/union that we
4237       // can tile with integers.
4238       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
4239         unsigned BitSize = DL.getTypeSizeInBits(OpTy);
4240         switch (BitSize) {
4241         default: break;
4242         case 1:
4243         case 8:
4244         case 16:
4245         case 32:
4246         case 64:
4247         case 128:
4248           OpInfo.ConstraintVT =
4249               MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
4250           break;
4251         }
4252       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
4253         unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
4254         OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
4255       } else {
4256         OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
4257       }
4258     }
4259   }
4260
4261   // If we have multiple alternative constraints, select the best alternative.
4262   if (!ConstraintOperands.empty()) {
4263     if (maCount) {
4264       unsigned bestMAIndex = 0;
4265       int bestWeight = -1;
4266       // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
4267       int weight = -1;
4268       unsigned maIndex;
4269       // Compute the sums of the weights for each alternative, keeping track
4270       // of the best (highest weight) one so far.
4271       for (maIndex = 0; maIndex < maCount; ++maIndex) {
4272         int weightSum = 0;
4273         for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4274              cIndex != eIndex; ++cIndex) {
4275           AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4276           if (OpInfo.Type == InlineAsm::isClobber)
4277             continue;
4278
4279           // If this is an output operand with a matching input operand,
4280           // look up the matching input. If their types mismatch, e.g. one
4281           // is an integer, the other is floating point, or their sizes are
4282           // different, flag it as an maCantMatch.
4283           if (OpInfo.hasMatchingInput()) {
4284             AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4285             if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4286               if ((OpInfo.ConstraintVT.isInteger() !=
4287                    Input.ConstraintVT.isInteger()) ||
4288                   (OpInfo.ConstraintVT.getSizeInBits() !=
4289                    Input.ConstraintVT.getSizeInBits())) {
4290                 weightSum = -1; // Can't match.
4291                 break;
4292               }
4293             }
4294           }
4295           weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
4296           if (weight == -1) {
4297             weightSum = -1;
4298             break;
4299           }
4300           weightSum += weight;
4301         }
4302         // Update best.
4303         if (weightSum > bestWeight) {
4304           bestWeight = weightSum;
4305           bestMAIndex = maIndex;
4306         }
4307       }
4308
4309       // Now select chosen alternative in each constraint.
4310       for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4311            cIndex != eIndex; ++cIndex) {
4312         AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
4313         if (cInfo.Type == InlineAsm::isClobber)
4314           continue;
4315         cInfo.selectAlternative(bestMAIndex);
4316       }
4317     }
4318   }
4319
4320   // Check and hook up tied operands, choose constraint code to use.
4321   for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
4322        cIndex != eIndex; ++cIndex) {
4323     AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
4324
4325     // If this is an output operand with a matching input operand, look up the
4326     // matching input. If their types mismatch, e.g. one is an integer, the
4327     // other is floating point, or their sizes are different, flag it as an
4328     // error.
4329     if (OpInfo.hasMatchingInput()) {
4330       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4331
4332       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4333         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
4334             getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
4335                                          OpInfo.ConstraintVT);
4336         std::pair<unsigned, const TargetRegisterClass *> InputRC =
4337             getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
4338                                          Input.ConstraintVT);
4339         if ((OpInfo.ConstraintVT.isInteger() !=
4340              Input.ConstraintVT.isInteger()) ||
4341             (MatchRC.second != InputRC.second)) {
4342           report_fatal_error("Unsupported asm: input constraint"
4343                              " with a matching output constraint of"
4344                              " incompatible type!");
4345         }
4346       }
4347     }
4348   }
4349
4350   return ConstraintOperands;
4351 }
4352
4353 /// Return an integer indicating how general CT is.
4354 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
4355   switch (CT) {
4356   case TargetLowering::C_Immediate:
4357   case TargetLowering::C_Other:
4358   case TargetLowering::C_Unknown:
4359     return 0;
4360   case TargetLowering::C_Register:
4361     return 1;
4362   case TargetLowering::C_RegisterClass:
4363     return 2;
4364   case TargetLowering::C_Memory:
4365     return 3;
4366   }
4367   llvm_unreachable("Invalid constraint type");
4368 }
4369
4370 /// Examine constraint type and operand type and determine a weight value.
4371 /// This object must already have been set up with the operand type
4372 /// and the current alternative constraint selected.
4373 TargetLowering::ConstraintWeight
4374   TargetLowering::getMultipleConstraintMatchWeight(
4375     AsmOperandInfo &info, int maIndex) const {
4376   InlineAsm::ConstraintCodeVector *rCodes;
4377   if (maIndex >= (int)info.multipleAlternatives.size())
4378     rCodes = &info.Codes;
4379   else
4380     rCodes = &info.multipleAlternatives[maIndex].Codes;
4381   ConstraintWeight BestWeight = CW_Invalid;
4382
4383   // Loop over the options, keeping track of the most general one.
4384   for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
4385     ConstraintWeight weight =
4386       getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
4387     if (weight > BestWeight)
4388       BestWeight = weight;
4389   }
4390
4391   return BestWeight;
4392 }
4393
4394 /// Examine constraint type and operand type and determine a weight value.
4395 /// This object must already have been set up with the operand type
4396 /// and the current alternative constraint selected.
4397 TargetLowering::ConstraintWeight
4398   TargetLowering::getSingleConstraintMatchWeight(
4399     AsmOperandInfo &info, const char *constraint) const {
4400   ConstraintWeight weight = CW_Invalid;
4401   Value *CallOperandVal = info.CallOperandVal;
4402     // If we don't have a value, we can't do a match,
4403     // but allow it at the lowest weight.
4404   if (!CallOperandVal)
4405     return CW_Default;
4406   // Look at the constraint type.
4407   switch (*constraint) {
4408     case 'i': // immediate integer.
4409     case 'n': // immediate integer with a known value.
4410       if (isa<ConstantInt>(CallOperandVal))
4411         weight = CW_Constant;
4412       break;
4413     case 's': // non-explicit intregal immediate.
4414       if (isa<GlobalValue>(CallOperandVal))
4415         weight = CW_Constant;
4416       break;
4417     case 'E': // immediate float if host format.
4418     case 'F': // immediate float.
4419       if (isa<ConstantFP>(CallOperandVal))
4420         weight = CW_Constant;
4421       break;
4422     case '<': // memory operand with autodecrement.
4423     case '>': // memory operand with autoincrement.
4424     case 'm': // memory operand.
4425     case 'o': // offsettable memory operand
4426     case 'V': // non-offsettable memory operand
4427       weight = CW_Memory;
4428       break;
4429     case 'r': // general register.
4430     case 'g': // general register, memory operand or immediate integer.
4431               // note: Clang converts "g" to "imr".
4432       if (CallOperandVal->getType()->isIntegerTy())
4433         weight = CW_Register;
4434       break;
4435     case 'X': // any operand.
4436   default:
4437     weight = CW_Default;
4438     break;
4439   }
4440   return weight;
4441 }
4442
4443 /// If there are multiple different constraints that we could pick for this
4444 /// operand (e.g. "imr") try to pick the 'best' one.
4445 /// This is somewhat tricky: constraints fall into four classes:
4446 ///    Other         -> immediates and magic values
4447 ///    Register      -> one specific register
4448 ///    RegisterClass -> a group of regs
4449 ///    Memory        -> memory
4450 /// Ideally, we would pick the most specific constraint possible: if we have
4451 /// something that fits into a register, we would pick it.  The problem here
4452 /// is that if we have something that could either be in a register or in
4453 /// memory that use of the register could cause selection of *other*
4454 /// operands to fail: they might only succeed if we pick memory.  Because of
4455 /// this the heuristic we use is:
4456 ///
4457 ///  1) If there is an 'other' constraint, and if the operand is valid for
4458 ///     that constraint, use it.  This makes us take advantage of 'i'
4459 ///     constraints when available.
4460 ///  2) Otherwise, pick the most general constraint present.  This prefers
4461 ///     'm' over 'r', for example.
4462 ///
4463 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
4464                              const TargetLowering &TLI,
4465                              SDValue Op, SelectionDAG *DAG) {
4466   assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
4467   unsigned BestIdx = 0;
4468   TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
4469   int BestGenerality = -1;
4470
4471   // Loop over the options, keeping track of the most general one.
4472   for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
4473     TargetLowering::ConstraintType CType =
4474       TLI.getConstraintType(OpInfo.Codes[i]);
4475
4476     // If this is an 'other' or 'immediate' constraint, see if the operand is
4477     // valid for it. For example, on X86 we might have an 'rI' constraint. If
4478     // the operand is an integer in the range [0..31] we want to use I (saving a
4479     // load of a register), otherwise we must use 'r'.
4480     if ((CType == TargetLowering::C_Other ||
4481          CType == TargetLowering::C_Immediate) && Op.getNode()) {
4482       assert(OpInfo.Codes[i].size() == 1 &&
4483              "Unhandled multi-letter 'other' constraint");
4484       std::vector<SDValue> ResultOps;
4485       TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
4486                                        ResultOps, *DAG);
4487       if (!ResultOps.empty()) {
4488         BestType = CType;
4489         BestIdx = i;
4490         break;
4491       }
4492     }
4493
4494     // Things with matching constraints can only be registers, per gcc
4495     // documentation.  This mainly affects "g" constraints.
4496     if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
4497       continue;
4498
4499     // This constraint letter is more general than the previous one, use it.
4500     int Generality = getConstraintGenerality(CType);
4501     if (Generality > BestGenerality) {
4502       BestType = CType;
4503       BestIdx = i;
4504       BestGenerality = Generality;
4505     }
4506   }
4507
4508   OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
4509   OpInfo.ConstraintType = BestType;
4510 }
4511
4512 /// Determines the constraint code and constraint type to use for the specific
4513 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
4514 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
4515                                             SDValue Op,
4516                                             SelectionDAG *DAG) const {
4517   assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
4518
4519   // Single-letter constraints ('r') are very common.
4520   if (OpInfo.Codes.size() == 1) {
4521     OpInfo.ConstraintCode = OpInfo.Codes[0];
4522     OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4523   } else {
4524     ChooseConstraint(OpInfo, *this, Op, DAG);
4525   }
4526
4527   // 'X' matches anything.
4528   if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
4529     // Labels and constants are handled elsewhere ('X' is the only thing
4530     // that matches labels).  For Functions, the type here is the type of
4531     // the result, which is not what we want to look at; leave them alone.
4532     Value *v = OpInfo.CallOperandVal;
4533     if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
4534       OpInfo.CallOperandVal = v;
4535       return;
4536     }
4537
4538     if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
4539       return;
4540
4541     // Otherwise, try to resolve it to something we know about by looking at
4542     // the actual operand type.
4543     if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
4544       OpInfo.ConstraintCode = Repl;
4545       OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
4546     }
4547   }
4548 }
4549
4550 /// Given an exact SDIV by a constant, create a multiplication
4551 /// with the multiplicative inverse of the constant.
4552 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
4553                               const SDLoc &dl, SelectionDAG &DAG,
4554                               SmallVectorImpl<SDNode *> &Created) {
4555   SDValue Op0 = N->getOperand(0);
4556   SDValue Op1 = N->getOperand(1);
4557   EVT VT = N->getValueType(0);
4558   EVT SVT = VT.getScalarType();
4559   EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
4560   EVT ShSVT = ShVT.getScalarType();
4561
4562   bool UseSRA = false;
4563   SmallVector<SDValue, 16> Shifts, Factors;
4564
4565   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4566     if (C->isNullValue())
4567       return false;
4568     APInt Divisor = C->getAPIntValue();
4569     unsigned Shift = Divisor.countTrailingZeros();
4570     if (Shift) {
4571       Divisor.ashrInPlace(Shift);
4572       UseSRA = true;
4573     }
4574     // Calculate the multiplicative inverse, using Newton's method.
4575     APInt t;
4576     APInt Factor = Divisor;
4577     while ((t = Divisor * Factor) != 1)
4578       Factor *= APInt(Divisor.getBitWidth(), 2) - t;
4579     Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
4580     Factors.push_back(DAG.getConstant(Factor, dl, SVT));
4581     return true;
4582   };
4583
4584   // Collect all magic values from the build vector.
4585   if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
4586     return SDValue();
4587
4588   SDValue Shift, Factor;
4589   if (VT.isVector()) {
4590     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4591     Factor = DAG.getBuildVector(VT, dl, Factors);
4592   } else {
4593     Shift = Shifts[0];
4594     Factor = Factors[0];
4595   }
4596
4597   SDValue Res = Op0;
4598
4599   // Shift the value upfront if it is even, so the LSB is one.
4600   if (UseSRA) {
4601     // TODO: For UDIV use SRL instead of SRA.
4602     SDNodeFlags Flags;
4603     Flags.setExact(true);
4604     Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
4605     Created.push_back(Res.getNode());
4606   }
4607
4608   return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
4609 }
4610
4611 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
4612                               SelectionDAG &DAG,
4613                               SmallVectorImpl<SDNode *> &Created) const {
4614   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4615   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4616   if (TLI.isIntDivCheap(N->getValueType(0), Attr))
4617     return SDValue(N, 0); // Lower SDIV as SDIV
4618   return SDValue();
4619 }
4620
4621 /// Given an ISD::SDIV node expressing a divide by constant,
4622 /// return a DAG expression to select that will generate the same value by
4623 /// multiplying by a magic number.
4624 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
4625 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
4626                                   bool IsAfterLegalization,
4627                                   SmallVectorImpl<SDNode *> &Created) const {
4628   SDLoc dl(N);
4629   EVT VT = N->getValueType(0);
4630   EVT SVT = VT.getScalarType();
4631   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4632   EVT ShSVT = ShVT.getScalarType();
4633   unsigned EltBits = VT.getScalarSizeInBits();
4634
4635   // Check to see if we can do this.
4636   // FIXME: We should be more aggressive here.
4637   if (!isTypeLegal(VT))
4638     return SDValue();
4639
4640   // If the sdiv has an 'exact' bit we can use a simpler lowering.
4641   if (N->getFlags().hasExact())
4642     return BuildExactSDIV(*this, N, dl, DAG, Created);
4643
4644   SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
4645
4646   auto BuildSDIVPattern = [&](ConstantSDNode *C) {
4647     if (C->isNullValue())
4648       return false;
4649
4650     const APInt &Divisor = C->getAPIntValue();
4651     APInt::ms magics = Divisor.magic();
4652     int NumeratorFactor = 0;
4653     int ShiftMask = -1;
4654
4655     if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
4656       // If d is +1/-1, we just multiply the numerator by +1/-1.
4657       NumeratorFactor = Divisor.getSExtValue();
4658       magics.m = 0;
4659       magics.s = 0;
4660       ShiftMask = 0;
4661     } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
4662       // If d > 0 and m < 0, add the numerator.
4663       NumeratorFactor = 1;
4664     } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
4665       // If d < 0 and m > 0, subtract the numerator.
4666       NumeratorFactor = -1;
4667     }
4668
4669     MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
4670     Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
4671     Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
4672     ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
4673     return true;
4674   };
4675
4676   SDValue N0 = N->getOperand(0);
4677   SDValue N1 = N->getOperand(1);
4678
4679   // Collect the shifts / magic values from each element.
4680   if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
4681     return SDValue();
4682
4683   SDValue MagicFactor, Factor, Shift, ShiftMask;
4684   if (VT.isVector()) {
4685     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4686     Factor = DAG.getBuildVector(VT, dl, Factors);
4687     Shift = DAG.getBuildVector(ShVT, dl, Shifts);
4688     ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
4689   } else {
4690     MagicFactor = MagicFactors[0];
4691     Factor = Factors[0];
4692     Shift = Shifts[0];
4693     ShiftMask = ShiftMasks[0];
4694   }
4695
4696   // Multiply the numerator (operand 0) by the magic value.
4697   // FIXME: We should support doing a MUL in a wider type.
4698   SDValue Q;
4699   if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
4700                           : isOperationLegalOrCustom(ISD::MULHS, VT))
4701     Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
4702   else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
4703                                : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
4704     SDValue LoHi =
4705         DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
4706     Q = SDValue(LoHi.getNode(), 1);
4707   } else
4708     return SDValue(); // No mulhs or equivalent.
4709   Created.push_back(Q.getNode());
4710
4711   // (Optionally) Add/subtract the numerator using Factor.
4712   Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
4713   Created.push_back(Factor.getNode());
4714   Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
4715   Created.push_back(Q.getNode());
4716
4717   // Shift right algebraic by shift value.
4718   Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
4719   Created.push_back(Q.getNode());
4720
4721   // Extract the sign bit, mask it and add it to the quotient.
4722   SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
4723   SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
4724   Created.push_back(T.getNode());
4725   T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
4726   Created.push_back(T.getNode());
4727   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
4728 }
4729
4730 /// Given an ISD::UDIV node expressing a divide by constant,
4731 /// return a DAG expression to select that will generate the same value by
4732 /// multiplying by a magic number.
4733 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
4734 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
4735                                   bool IsAfterLegalization,
4736                                   SmallVectorImpl<SDNode *> &Created) const {
4737   SDLoc dl(N);
4738   EVT VT = N->getValueType(0);
4739   EVT SVT = VT.getScalarType();
4740   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4741   EVT ShSVT = ShVT.getScalarType();
4742   unsigned EltBits = VT.getScalarSizeInBits();
4743
4744   // Check to see if we can do this.
4745   // FIXME: We should be more aggressive here.
4746   if (!isTypeLegal(VT))
4747     return SDValue();
4748
4749   bool UseNPQ = false;
4750   SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
4751
4752   auto BuildUDIVPattern = [&](ConstantSDNode *C) {
4753     if (C->isNullValue())
4754       return false;
4755     // FIXME: We should use a narrower constant when the upper
4756     // bits are known to be zero.
4757     APInt Divisor = C->getAPIntValue();
4758     APInt::mu magics = Divisor.magicu();
4759     unsigned PreShift = 0, PostShift = 0;
4760
4761     // If the divisor is even, we can avoid using the expensive fixup by
4762     // shifting the divided value upfront.
4763     if (magics.a != 0 && !Divisor[0]) {
4764       PreShift = Divisor.countTrailingZeros();
4765       // Get magic number for the shifted divisor.
4766       magics = Divisor.lshr(PreShift).magicu(PreShift);
4767       assert(magics.a == 0 && "Should use cheap fixup now");
4768     }
4769
4770     APInt Magic = magics.m;
4771
4772     unsigned SelNPQ;
4773     if (magics.a == 0 || Divisor.isOneValue()) {
4774       assert(magics.s < Divisor.getBitWidth() &&
4775              "We shouldn't generate an undefined shift!");
4776       PostShift = magics.s;
4777       SelNPQ = false;
4778     } else {
4779       PostShift = magics.s - 1;
4780       SelNPQ = true;
4781     }
4782
4783     PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
4784     MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
4785     NPQFactors.push_back(
4786         DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
4787                                : APInt::getNullValue(EltBits),
4788                         dl, SVT));
4789     PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
4790     UseNPQ |= SelNPQ;
4791     return true;
4792   };
4793
4794   SDValue N0 = N->getOperand(0);
4795   SDValue N1 = N->getOperand(1);
4796
4797   // Collect the shifts/magic values from each element.
4798   if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
4799     return SDValue();
4800
4801   SDValue PreShift, PostShift, MagicFactor, NPQFactor;
4802   if (VT.isVector()) {
4803     PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
4804     MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
4805     NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
4806     PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
4807   } else {
4808     PreShift = PreShifts[0];
4809     MagicFactor = MagicFactors[0];
4810     PostShift = PostShifts[0];
4811   }
4812
4813   SDValue Q = N0;
4814   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
4815   Created.push_back(Q.getNode());
4816
4817   // FIXME: We should support doing a MUL in a wider type.
4818   auto GetMULHU = [&](SDValue X, SDValue Y) {
4819     if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
4820                             : isOperationLegalOrCustom(ISD::MULHU, VT))
4821       return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
4822     if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
4823                             : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
4824       SDValue LoHi =
4825           DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
4826       return SDValue(LoHi.getNode(), 1);
4827     }
4828     return SDValue(); // No mulhu or equivalent
4829   };
4830
4831   // Multiply the numerator (operand 0) by the magic value.
4832   Q = GetMULHU(Q, MagicFactor);
4833   if (!Q)
4834     return SDValue();
4835
4836   Created.push_back(Q.getNode());
4837
4838   if (UseNPQ) {
4839     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
4840     Created.push_back(NPQ.getNode());
4841
4842     // For vectors we might have a mix of non-NPQ/NPQ paths, so use
4843     // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
4844     if (VT.isVector())
4845       NPQ = GetMULHU(NPQ, NPQFactor);
4846     else
4847       NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
4848
4849     Created.push_back(NPQ.getNode());
4850
4851     Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
4852     Created.push_back(Q.getNode());
4853   }
4854
4855   Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
4856   Created.push_back(Q.getNode());
4857
4858   SDValue One = DAG.getConstant(1, dl, VT);
4859   SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
4860   return DAG.getSelect(dl, VT, IsOne, N0, Q);
4861 }
4862
4863 /// If all values in Values that *don't* match the predicate are same 'splat'
4864 /// value, then replace all values with that splat value.
4865 /// Else, if AlternativeReplacement was provided, then replace all values that
4866 /// do match predicate with AlternativeReplacement value.
4867 static void
4868 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
4869                           std::function<bool(SDValue)> Predicate,
4870                           SDValue AlternativeReplacement = SDValue()) {
4871   SDValue Replacement;
4872   // Is there a value for which the Predicate does *NOT* match? What is it?
4873   auto SplatValue = llvm::find_if_not(Values, Predicate);
4874   if (SplatValue != Values.end()) {
4875     // Does Values consist only of SplatValue's and values matching Predicate?
4876     if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
4877           return Value == *SplatValue || Predicate(Value);
4878         })) // Then we shall replace values matching predicate with SplatValue.
4879       Replacement = *SplatValue;
4880   }
4881   if (!Replacement) {
4882     // Oops, we did not find the "baseline" splat value.
4883     if (!AlternativeReplacement)
4884       return; // Nothing to do.
4885     // Let's replace with provided value then.
4886     Replacement = AlternativeReplacement;
4887   }
4888   std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
4889 }
4890
4891 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
4892 /// where the divisor is constant and the comparison target is zero,
4893 /// return a DAG expression that will generate the same comparison result
4894 /// using only multiplications, additions and shifts/rotations.
4895 /// Ref: "Hacker's Delight" 10-17.
4896 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
4897                                         SDValue CompTargetNode,
4898                                         ISD::CondCode Cond,
4899                                         DAGCombinerInfo &DCI,
4900                                         const SDLoc &DL) const {
4901   SmallVector<SDNode *, 2> Built;
4902   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
4903                                          DCI, DL, Built)) {
4904     for (SDNode *N : Built)
4905       DCI.AddToWorklist(N);
4906     return Folded;
4907   }
4908
4909   return SDValue();
4910 }
4911
4912 SDValue
4913 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
4914                                   SDValue CompTargetNode, ISD::CondCode Cond,
4915                                   DAGCombinerInfo &DCI, const SDLoc &DL,
4916                                   SmallVectorImpl<SDNode *> &Created) const {
4917   // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
4918   // - D must be constant, with D = D0 * 2^K where D0 is odd
4919   // - P is the multiplicative inverse of D0 modulo 2^W
4920   // - Q = floor(((2^W) - 1) / D)
4921   // where W is the width of the common type of N and D.
4922   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4923          "Only applicable for (in)equality comparisons.");
4924
4925   SelectionDAG &DAG = DCI.DAG;
4926
4927   EVT VT = REMNode.getValueType();
4928   EVT SVT = VT.getScalarType();
4929   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
4930   EVT ShSVT = ShVT.getScalarType();
4931
4932   // If MUL is unavailable, we cannot proceed in any case.
4933   if (!isOperationLegalOrCustom(ISD::MUL, VT))
4934     return SDValue();
4935
4936   // TODO: Could support comparing with non-zero too.
4937   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
4938   if (!CompTarget || !CompTarget->isNullValue())
4939     return SDValue();
4940
4941   bool HadOneDivisor = false;
4942   bool AllDivisorsAreOnes = true;
4943   bool HadEvenDivisor = false;
4944   bool AllDivisorsArePowerOfTwo = true;
4945   SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
4946
4947   auto BuildUREMPattern = [&](ConstantSDNode *C) {
4948     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
4949     if (C->isNullValue())
4950       return false;
4951
4952     const APInt &D = C->getAPIntValue();
4953     // If all divisors are ones, we will prefer to avoid the fold.
4954     HadOneDivisor |= D.isOneValue();
4955     AllDivisorsAreOnes &= D.isOneValue();
4956
4957     // Decompose D into D0 * 2^K
4958     unsigned K = D.countTrailingZeros();
4959     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
4960     APInt D0 = D.lshr(K);
4961
4962     // D is even if it has trailing zeros.
4963     HadEvenDivisor |= (K != 0);
4964     // D is a power-of-two if D0 is one.
4965     // If all divisors are power-of-two, we will prefer to avoid the fold.
4966     AllDivisorsArePowerOfTwo &= D0.isOneValue();
4967
4968     // P = inv(D0, 2^W)
4969     // 2^W requires W + 1 bits, so we have to extend and then truncate.
4970     unsigned W = D.getBitWidth();
4971     APInt P = D0.zext(W + 1)
4972                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
4973                   .trunc(W);
4974     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
4975     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
4976
4977     // Q = floor((2^W - 1) / D)
4978     APInt Q = APInt::getAllOnesValue(W).udiv(D);
4979
4980     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
4981            "We are expecting that K is always less than all-ones for ShSVT");
4982
4983     // If the divisor is 1 the result can be constant-folded.
4984     if (D.isOneValue()) {
4985       // Set P and K amount to a bogus values so we can try to splat them.
4986       P = 0;
4987       K = -1;
4988       assert(Q.isAllOnesValue() &&
4989              "Expecting all-ones comparison for one divisor");
4990     }
4991
4992     PAmts.push_back(DAG.getConstant(P, DL, SVT));
4993     KAmts.push_back(
4994         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
4995     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
4996     return true;
4997   };
4998
4999   SDValue N = REMNode.getOperand(0);
5000   SDValue D = REMNode.getOperand(1);
5001
5002   // Collect the values from each element.
5003   if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
5004     return SDValue();
5005
5006   // If this is a urem by a one, avoid the fold since it can be constant-folded.
5007   if (AllDivisorsAreOnes)
5008     return SDValue();
5009
5010   // If this is a urem by a powers-of-two, avoid the fold since it can be
5011   // best implemented as a bit test.
5012   if (AllDivisorsArePowerOfTwo)
5013     return SDValue();
5014
5015   SDValue PVal, KVal, QVal;
5016   if (VT.isVector()) {
5017     if (HadOneDivisor) {
5018       // Try to turn PAmts into a splat, since we don't care about the values
5019       // that are currently '0'. If we can't, just keep '0'`s.
5020       turnVectorIntoSplatVector(PAmts, isNullConstant);
5021       // Try to turn KAmts into a splat, since we don't care about the values
5022       // that are currently '-1'. If we can't, change them to '0'`s.
5023       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5024                                 DAG.getConstant(0, DL, ShSVT));
5025     }
5026
5027     PVal = DAG.getBuildVector(VT, DL, PAmts);
5028     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5029     QVal = DAG.getBuildVector(VT, DL, QAmts);
5030   } else {
5031     PVal = PAmts[0];
5032     KVal = KAmts[0];
5033     QVal = QAmts[0];
5034   }
5035
5036   // (mul N, P)
5037   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5038   Created.push_back(Op0.getNode());
5039
5040   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5041   // divisors as a performance improvement, since rotating by 0 is a no-op.
5042   if (HadEvenDivisor) {
5043     // We need ROTR to do this.
5044     if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5045       return SDValue();
5046     SDNodeFlags Flags;
5047     Flags.setExact(true);
5048     // UREM: (rotr (mul N, P), K)
5049     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5050     Created.push_back(Op0.getNode());
5051   }
5052
5053   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
5054   return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5055                       ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5056 }
5057
5058 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
5059 /// where the divisor is constant and the comparison target is zero,
5060 /// return a DAG expression that will generate the same comparison result
5061 /// using only multiplications, additions and shifts/rotations.
5062 /// Ref: "Hacker's Delight" 10-17.
5063 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
5064                                         SDValue CompTargetNode,
5065                                         ISD::CondCode Cond,
5066                                         DAGCombinerInfo &DCI,
5067                                         const SDLoc &DL) const {
5068   SmallVector<SDNode *, 7> Built;
5069   if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
5070                                          DCI, DL, Built)) {
5071     assert(Built.size() <= 7 && "Max size prediction failed.");
5072     for (SDNode *N : Built)
5073       DCI.AddToWorklist(N);
5074     return Folded;
5075   }
5076
5077   return SDValue();
5078 }
5079
5080 SDValue
5081 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
5082                                   SDValue CompTargetNode, ISD::CondCode Cond,
5083                                   DAGCombinerInfo &DCI, const SDLoc &DL,
5084                                   SmallVectorImpl<SDNode *> &Created) const {
5085   // Fold:
5086   //   (seteq/ne (srem N, D), 0)
5087   // To:
5088   //   (setule/ugt (rotr (add (mul N, P), A), K), Q)
5089   //
5090   // - D must be constant, with D = D0 * 2^K where D0 is odd
5091   // - P is the multiplicative inverse of D0 modulo 2^W
5092   // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
5093   // - Q = floor((2 * A) / (2^K))
5094   // where W is the width of the common type of N and D.
5095   assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5096          "Only applicable for (in)equality comparisons.");
5097
5098   SelectionDAG &DAG = DCI.DAG;
5099
5100   EVT VT = REMNode.getValueType();
5101   EVT SVT = VT.getScalarType();
5102   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5103   EVT ShSVT = ShVT.getScalarType();
5104
5105   // If MUL is unavailable, we cannot proceed in any case.
5106   if (!isOperationLegalOrCustom(ISD::MUL, VT))
5107     return SDValue();
5108
5109   // TODO: Could support comparing with non-zero too.
5110   ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
5111   if (!CompTarget || !CompTarget->isNullValue())
5112     return SDValue();
5113
5114   bool HadIntMinDivisor = false;
5115   bool HadOneDivisor = false;
5116   bool AllDivisorsAreOnes = true;
5117   bool HadEvenDivisor = false;
5118   bool NeedToApplyOffset = false;
5119   bool AllDivisorsArePowerOfTwo = true;
5120   SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
5121
5122   auto BuildSREMPattern = [&](ConstantSDNode *C) {
5123     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5124     if (C->isNullValue())
5125       return false;
5126
5127     // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
5128
5129     // WARNING: this fold is only valid for positive divisors!
5130     APInt D = C->getAPIntValue();
5131     if (D.isNegative())
5132       D.negate(); //  `rem %X, -C` is equivalent to `rem %X, C`
5133
5134     HadIntMinDivisor |= D.isMinSignedValue();
5135
5136     // If all divisors are ones, we will prefer to avoid the fold.
5137     HadOneDivisor |= D.isOneValue();
5138     AllDivisorsAreOnes &= D.isOneValue();
5139
5140     // Decompose D into D0 * 2^K
5141     unsigned K = D.countTrailingZeros();
5142     assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
5143     APInt D0 = D.lshr(K);
5144
5145     if (!D.isMinSignedValue()) {
5146       // D is even if it has trailing zeros; unless it's INT_MIN, in which case
5147       // we don't care about this lane in this fold, we'll special-handle it.
5148       HadEvenDivisor |= (K != 0);
5149     }
5150
5151     // D is a power-of-two if D0 is one. This includes INT_MIN.
5152     // If all divisors are power-of-two, we will prefer to avoid the fold.
5153     AllDivisorsArePowerOfTwo &= D0.isOneValue();
5154
5155     // P = inv(D0, 2^W)
5156     // 2^W requires W + 1 bits, so we have to extend and then truncate.
5157     unsigned W = D.getBitWidth();
5158     APInt P = D0.zext(W + 1)
5159                   .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
5160                   .trunc(W);
5161     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
5162     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
5163
5164     // A = floor((2^(W - 1) - 1) / D0) & -2^K
5165     APInt A = APInt::getSignedMaxValue(W).udiv(D0);
5166     A.clearLowBits(K);
5167
5168     if (!D.isMinSignedValue()) {
5169       // If divisor INT_MIN, then we don't care about this lane in this fold,
5170       // we'll special-handle it.
5171       NeedToApplyOffset |= A != 0;
5172     }
5173
5174     // Q = floor((2 * A) / (2^K))
5175     APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
5176
5177     assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
5178            "We are expecting that A is always less than all-ones for SVT");
5179     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
5180            "We are expecting that K is always less than all-ones for ShSVT");
5181
5182     // If the divisor is 1 the result can be constant-folded. Likewise, we
5183     // don't care about INT_MIN lanes, those can be set to undef if appropriate.
5184     if (D.isOneValue()) {
5185       // Set P, A and K to a bogus values so we can try to splat them.
5186       P = 0;
5187       A = -1;
5188       K = -1;
5189
5190       // x ?% 1 == 0  <-->  true  <-->  x u<= -1
5191       Q = -1;
5192     }
5193
5194     PAmts.push_back(DAG.getConstant(P, DL, SVT));
5195     AAmts.push_back(DAG.getConstant(A, DL, SVT));
5196     KAmts.push_back(
5197         DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
5198     QAmts.push_back(DAG.getConstant(Q, DL, SVT));
5199     return true;
5200   };
5201
5202   SDValue N = REMNode.getOperand(0);
5203   SDValue D = REMNode.getOperand(1);
5204
5205   // Collect the values from each element.
5206   if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
5207     return SDValue();
5208
5209   // If this is a srem by a one, avoid the fold since it can be constant-folded.
5210   if (AllDivisorsAreOnes)
5211     return SDValue();
5212
5213   // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
5214   // since it can be best implemented as a bit test.
5215   if (AllDivisorsArePowerOfTwo)
5216     return SDValue();
5217
5218   SDValue PVal, AVal, KVal, QVal;
5219   if (VT.isVector()) {
5220     if (HadOneDivisor) {
5221       // Try to turn PAmts into a splat, since we don't care about the values
5222       // that are currently '0'. If we can't, just keep '0'`s.
5223       turnVectorIntoSplatVector(PAmts, isNullConstant);
5224       // Try to turn AAmts into a splat, since we don't care about the
5225       // values that are currently '-1'. If we can't, change them to '0'`s.
5226       turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
5227                                 DAG.getConstant(0, DL, SVT));
5228       // Try to turn KAmts into a splat, since we don't care about the values
5229       // that are currently '-1'. If we can't, change them to '0'`s.
5230       turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
5231                                 DAG.getConstant(0, DL, ShSVT));
5232     }
5233
5234     PVal = DAG.getBuildVector(VT, DL, PAmts);
5235     AVal = DAG.getBuildVector(VT, DL, AAmts);
5236     KVal = DAG.getBuildVector(ShVT, DL, KAmts);
5237     QVal = DAG.getBuildVector(VT, DL, QAmts);
5238   } else {
5239     PVal = PAmts[0];
5240     AVal = AAmts[0];
5241     KVal = KAmts[0];
5242     QVal = QAmts[0];
5243   }
5244
5245   // (mul N, P)
5246   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
5247   Created.push_back(Op0.getNode());
5248
5249   if (NeedToApplyOffset) {
5250     // We need ADD to do this.
5251     if (!isOperationLegalOrCustom(ISD::ADD, VT))
5252       return SDValue();
5253
5254     // (add (mul N, P), A)
5255     Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
5256     Created.push_back(Op0.getNode());
5257   }
5258
5259   // Rotate right only if any divisor was even. We avoid rotates for all-odd
5260   // divisors as a performance improvement, since rotating by 0 is a no-op.
5261   if (HadEvenDivisor) {
5262     // We need ROTR to do this.
5263     if (!isOperationLegalOrCustom(ISD::ROTR, VT))
5264       return SDValue();
5265     SDNodeFlags Flags;
5266     Flags.setExact(true);
5267     // SREM: (rotr (add (mul N, P), A), K)
5268     Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
5269     Created.push_back(Op0.getNode());
5270   }
5271
5272   // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
5273   SDValue Fold =
5274       DAG.getSetCC(DL, SETCCVT, Op0, QVal,
5275                    ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
5276
5277   // If we didn't have lanes with INT_MIN divisor, then we're done.
5278   if (!HadIntMinDivisor)
5279     return Fold;
5280
5281   // That fold is only valid for positive divisors. Which effectively means,
5282   // it is invalid for INT_MIN divisors. So if we have such a lane,
5283   // we must fix-up results for said lanes.
5284   assert(VT.isVector() && "Can/should only get here for vectors.");
5285
5286   if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
5287       !isOperationLegalOrCustom(ISD::AND, VT) ||
5288       !isOperationLegalOrCustom(Cond, VT) ||
5289       !isOperationLegalOrCustom(ISD::VSELECT, VT))
5290     return SDValue();
5291
5292   Created.push_back(Fold.getNode());
5293
5294   SDValue IntMin = DAG.getConstant(
5295       APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
5296   SDValue IntMax = DAG.getConstant(
5297       APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
5298   SDValue Zero =
5299       DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
5300
5301   // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
5302   SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
5303   Created.push_back(DivisorIsIntMin.getNode());
5304
5305   // (N s% INT_MIN) ==/!= 0  <-->  (N & INT_MAX) ==/!= 0
5306   SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
5307   Created.push_back(Masked.getNode());
5308   SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
5309   Created.push_back(MaskedIsZero.getNode());
5310
5311   // To produce final result we need to blend 2 vectors: 'SetCC' and
5312   // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
5313   // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
5314   // constant-folded, select can get lowered to a shuffle with constant mask.
5315   SDValue Blended =
5316       DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
5317
5318   return Blended;
5319 }
5320
5321 bool TargetLowering::
5322 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
5323   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
5324     DAG.getContext()->emitError("argument to '__builtin_return_address' must "
5325                                 "be a constant integer");
5326     return true;
5327   }
5328
5329   return false;
5330 }
5331
5332 //===----------------------------------------------------------------------===//
5333 // Legalization Utilities
5334 //===----------------------------------------------------------------------===//
5335
5336 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
5337                                     SDValue LHS, SDValue RHS,
5338                                     SmallVectorImpl<SDValue> &Result,
5339                                     EVT HiLoVT, SelectionDAG &DAG,
5340                                     MulExpansionKind Kind, SDValue LL,
5341                                     SDValue LH, SDValue RL, SDValue RH) const {
5342   assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
5343          Opcode == ISD::SMUL_LOHI);
5344
5345   bool HasMULHS = (Kind == MulExpansionKind::Always) ||
5346                   isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
5347   bool HasMULHU = (Kind == MulExpansionKind::Always) ||
5348                   isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
5349   bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5350                       isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
5351   bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
5352                       isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
5353
5354   if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
5355     return false;
5356
5357   unsigned OuterBitSize = VT.getScalarSizeInBits();
5358   unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
5359   unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
5360   unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
5361
5362   // LL, LH, RL, and RH must be either all NULL or all set to a value.
5363   assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
5364          (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
5365
5366   SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
5367   auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
5368                           bool Signed) -> bool {
5369     if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
5370       Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
5371       Hi = SDValue(Lo.getNode(), 1);
5372       return true;
5373     }
5374     if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
5375       Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
5376       Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
5377       return true;
5378     }
5379     return false;
5380   };
5381
5382   SDValue Lo, Hi;
5383
5384   if (!LL.getNode() && !RL.getNode() &&
5385       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5386     LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
5387     RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
5388   }
5389
5390   if (!LL.getNode())
5391     return false;
5392
5393   APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
5394   if (DAG.MaskedValueIsZero(LHS, HighMask) &&
5395       DAG.MaskedValueIsZero(RHS, HighMask)) {
5396     // The inputs are both zero-extended.
5397     if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
5398       Result.push_back(Lo);
5399       Result.push_back(Hi);
5400       if (Opcode != ISD::MUL) {
5401         SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
5402         Result.push_back(Zero);
5403         Result.push_back(Zero);
5404       }
5405       return true;
5406     }
5407   }
5408
5409   if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
5410       RHSSB > InnerBitSize) {
5411     // The input values are both sign-extended.
5412     // TODO non-MUL case?
5413     if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
5414       Result.push_back(Lo);
5415       Result.push_back(Hi);
5416       return true;
5417     }
5418   }
5419
5420   unsigned ShiftAmount = OuterBitSize - InnerBitSize;
5421   EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
5422   if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
5423     // FIXME getShiftAmountTy does not always return a sensible result when VT
5424     // is an illegal type, and so the type may be too small to fit the shift
5425     // amount. Override it with i32. The shift will have to be legalized.
5426     ShiftAmountTy = MVT::i32;
5427   }
5428   SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
5429
5430   if (!LH.getNode() && !RH.getNode() &&
5431       isOperationLegalOrCustom(ISD::SRL, VT) &&
5432       isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
5433     LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
5434     LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
5435     RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
5436     RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
5437   }
5438
5439   if (!LH.getNode())
5440     return false;
5441
5442   if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
5443     return false;
5444
5445   Result.push_back(Lo);
5446
5447   if (Opcode == ISD::MUL) {
5448     RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
5449     LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
5450     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
5451     Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
5452     Result.push_back(Hi);
5453     return true;
5454   }
5455
5456   // Compute the full width result.
5457   auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
5458     Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
5459     Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
5460     Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
5461     return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
5462   };
5463
5464   SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
5465   if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
5466     return false;
5467
5468   // This is effectively the add part of a multiply-add of half-sized operands,
5469   // so it cannot overflow.
5470   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
5471
5472   if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
5473     return false;
5474
5475   SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
5476   EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5477
5478   bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
5479                   isOperationLegalOrCustom(ISD::ADDE, VT));
5480   if (UseGlue)
5481     Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
5482                        Merge(Lo, Hi));
5483   else
5484     Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
5485                        Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
5486
5487   SDValue Carry = Next.getValue(1);
5488   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5489   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
5490
5491   if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
5492     return false;
5493
5494   if (UseGlue)
5495     Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
5496                      Carry);
5497   else
5498     Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
5499                      Zero, Carry);
5500
5501   Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
5502
5503   if (Opcode == ISD::SMUL_LOHI) {
5504     SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
5505                                   DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
5506     Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
5507
5508     NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
5509                           DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
5510     Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
5511   }
5512
5513   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5514   Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
5515   Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
5516   return true;
5517 }
5518
5519 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
5520                                SelectionDAG &DAG, MulExpansionKind Kind,
5521                                SDValue LL, SDValue LH, SDValue RL,
5522                                SDValue RH) const {
5523   SmallVector<SDValue, 2> Result;
5524   bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
5525                            N->getOperand(0), N->getOperand(1), Result, HiLoVT,
5526                            DAG, Kind, LL, LH, RL, RH);
5527   if (Ok) {
5528     assert(Result.size() == 2);
5529     Lo = Result[0];
5530     Hi = Result[1];
5531   }
5532   return Ok;
5533 }
5534
5535 bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result,
5536                                        SelectionDAG &DAG) const {
5537   EVT VT = Node->getValueType(0);
5538
5539   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
5540                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
5541                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
5542                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
5543     return false;
5544
5545   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
5546   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
5547   SDValue X = Node->getOperand(0);
5548   SDValue Y = Node->getOperand(1);
5549   SDValue Z = Node->getOperand(2);
5550
5551   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5552   bool IsFSHL = Node->getOpcode() == ISD::FSHL;
5553   SDLoc DL(SDValue(Node, 0));
5554
5555   EVT ShVT = Z.getValueType();
5556   SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
5557   SDValue Zero = DAG.getConstant(0, DL, ShVT);
5558
5559   SDValue ShAmt;
5560   if (isPowerOf2_32(EltSizeInBits)) {
5561     SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
5562     ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
5563   } else {
5564     ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
5565   }
5566
5567   SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
5568   SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
5569   SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
5570   SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
5571
5572   // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
5573   // and that is undefined. We must compare and select to avoid UB.
5574   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT);
5575
5576   // For fshl, 0-shift returns the 1st arg (X).
5577   // For fshr, 0-shift returns the 2nd arg (Y).
5578   SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ);
5579   Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or);
5580   return true;
5581 }
5582
5583 // TODO: Merge with expandFunnelShift.
5584 bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
5585                                SelectionDAG &DAG) const {
5586   EVT VT = Node->getValueType(0);
5587   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5588   bool IsLeft = Node->getOpcode() == ISD::ROTL;
5589   SDValue Op0 = Node->getOperand(0);
5590   SDValue Op1 = Node->getOperand(1);
5591   SDLoc DL(SDValue(Node, 0));
5592
5593   EVT ShVT = Op1.getValueType();
5594   SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
5595
5596   // If a rotate in the other direction is legal, use it.
5597   unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
5598   if (isOperationLegal(RevRot, VT)) {
5599     SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
5600     Result = DAG.getNode(RevRot, DL, VT, Op0, Sub);
5601     return true;
5602   }
5603
5604   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
5605                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
5606                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
5607                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
5608                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
5609     return false;
5610
5611   // Otherwise,
5612   //   (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
5613   //   (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
5614   //
5615   assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 &&
5616          "Expecting the type bitwidth to be a power of 2");
5617   unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
5618   unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
5619   SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
5620   SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1);
5621   SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
5622   SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
5623   Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0),
5624                        DAG.getNode(HsOpc, DL, VT, Op0, And1));
5625   return true;
5626 }
5627
5628 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
5629                                       SelectionDAG &DAG) const {
5630   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
5631   SDValue Src = Node->getOperand(OpNo);
5632   EVT SrcVT = Src.getValueType();
5633   EVT DstVT = Node->getValueType(0);
5634   SDLoc dl(SDValue(Node, 0));
5635
5636   // FIXME: Only f32 to i64 conversions are supported.
5637   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
5638     return false;
5639
5640   if (Node->isStrictFPOpcode())
5641     // When a NaN is converted to an integer a trap is allowed. We can't
5642     // use this expansion here because it would eliminate that trap. Other
5643     // traps are also allowed and cannot be eliminated. See
5644     // IEEE 754-2008 sec 5.8.
5645     return false;
5646
5647   // Expand f32 -> i64 conversion
5648   // This algorithm comes from compiler-rt's implementation of fixsfdi:
5649   // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
5650   unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
5651   EVT IntVT = SrcVT.changeTypeToInteger();
5652   EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
5653
5654   SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
5655   SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
5656   SDValue Bias = DAG.getConstant(127, dl, IntVT);
5657   SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
5658   SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
5659   SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
5660
5661   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
5662
5663   SDValue ExponentBits = DAG.getNode(
5664       ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
5665       DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
5666   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
5667
5668   SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
5669                              DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
5670                              DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
5671   Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
5672
5673   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
5674                           DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
5675                           DAG.getConstant(0x00800000, dl, IntVT));
5676
5677   R = DAG.getZExtOrTrunc(R, dl, DstVT);
5678
5679   R = DAG.getSelectCC(
5680       dl, Exponent, ExponentLoBit,
5681       DAG.getNode(ISD::SHL, dl, DstVT, R,
5682                   DAG.getZExtOrTrunc(
5683                       DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
5684                       dl, IntShVT)),
5685       DAG.getNode(ISD::SRL, dl, DstVT, R,
5686                   DAG.getZExtOrTrunc(
5687                       DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
5688                       dl, IntShVT)),
5689       ISD::SETGT);
5690
5691   SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
5692                             DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
5693
5694   Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
5695                            DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
5696   return true;
5697 }
5698
5699 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
5700                                       SDValue &Chain,
5701                                       SelectionDAG &DAG) const {
5702   SDLoc dl(SDValue(Node, 0));
5703   unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
5704   SDValue Src = Node->getOperand(OpNo);
5705
5706   EVT SrcVT = Src.getValueType();
5707   EVT DstVT = Node->getValueType(0);
5708   EVT SetCCVT =
5709       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
5710
5711   // Only expand vector types if we have the appropriate vector bit operations.
5712   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
5713                                                    ISD::FP_TO_SINT;
5714   if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
5715                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
5716     return false;
5717
5718   // If the maximum float value is smaller then the signed integer range,
5719   // the destination signmask can't be represented by the float, so we can
5720   // just use FP_TO_SINT directly.
5721   const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
5722   APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
5723   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
5724   if (APFloat::opOverflow &
5725       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
5726     if (Node->isStrictFPOpcode()) {
5727       Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
5728                            { Node->getOperand(0), Src });
5729       Chain = Result.getValue(1);
5730     } else
5731       Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
5732     return true;
5733   }
5734
5735   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
5736   SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
5737
5738   bool Strict = Node->isStrictFPOpcode() ||
5739                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
5740
5741   if (Strict) {
5742     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
5743     // signmask then offset (the result of which should be fully representable).
5744     // Sel = Src < 0x8000000000000000
5745     // Val = select Sel, Src, Src - 0x8000000000000000
5746     // Ofs = select Sel, 0, 0x8000000000000000
5747     // Result = fp_to_sint(Val) ^ Ofs
5748
5749     // TODO: Should any fast-math-flags be set for the FSUB?
5750     SDValue SrcBiased;
5751     if (Node->isStrictFPOpcode())
5752       SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
5753                               { Node->getOperand(0), Src, Cst });
5754     else
5755       SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
5756     SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
5757     SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
5758                                 DAG.getConstant(SignMask, dl, DstVT));
5759     SDValue SInt;
5760     if (Node->isStrictFPOpcode()) {
5761       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
5762                          { SrcBiased.getValue(1), Val });
5763       Chain = SInt.getValue(1);
5764     } else
5765       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
5766     Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
5767   } else {
5768     // Expand based on maximum range of FP_TO_SINT:
5769     // True = fp_to_sint(Src)
5770     // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
5771     // Result = select (Src < 0x8000000000000000), True, False
5772
5773     SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
5774     // TODO: Should any fast-math-flags be set for the FSUB?
5775     SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
5776                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
5777     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
5778                         DAG.getConstant(SignMask, dl, DstVT));
5779     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
5780   }
5781   return true;
5782 }
5783
5784 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
5785                                       SelectionDAG &DAG) const {
5786   SDValue Src = Node->getOperand(0);
5787   EVT SrcVT = Src.getValueType();
5788   EVT DstVT = Node->getValueType(0);
5789
5790   if (SrcVT.getScalarType() != MVT::i64)
5791     return false;
5792
5793   SDLoc dl(SDValue(Node, 0));
5794   EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
5795
5796   if (DstVT.getScalarType() == MVT::f32) {
5797     // Only expand vector types if we have the appropriate vector bit
5798     // operations.
5799     if (SrcVT.isVector() &&
5800         (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
5801          !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
5802          !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) ||
5803          !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
5804          !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
5805       return false;
5806
5807     // For unsigned conversions, convert them to signed conversions using the
5808     // algorithm from the x86_64 __floatundidf in compiler_rt.
5809     SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
5810
5811     SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
5812     SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
5813     SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
5814     SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
5815     SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
5816
5817     SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
5818     SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
5819
5820     // TODO: This really should be implemented using a branch rather than a
5821     // select.  We happen to get lucky and machinesink does the right
5822     // thing most of the time.  This would be a good candidate for a
5823     // pseudo-op, or, even better, for whole-function isel.
5824     EVT SetCCVT =
5825         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
5826
5827     SDValue SignBitTest = DAG.getSetCC(
5828         dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
5829     Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
5830     return true;
5831   }
5832
5833   if (DstVT.getScalarType() == MVT::f64) {
5834     // Only expand vector types if we have the appropriate vector bit
5835     // operations.
5836     if (SrcVT.isVector() &&
5837         (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
5838          !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
5839          !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
5840          !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
5841          !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
5842       return false;
5843
5844     // Implementation of unsigned i64 to f64 following the algorithm in
5845     // __floatundidf in compiler_rt. This implementation has the advantage
5846     // of performing rounding correctly, both in the default rounding mode
5847     // and in all alternate rounding modes.
5848     SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
5849     SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
5850         BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
5851     SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
5852     SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
5853     SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
5854
5855     SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
5856     SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
5857     SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
5858     SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
5859     SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
5860     SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
5861     SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
5862     Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
5863     return true;
5864   }
5865
5866   return false;
5867 }
5868
5869 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
5870                                               SelectionDAG &DAG) const {
5871   SDLoc dl(Node);
5872   unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
5873     ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
5874   EVT VT = Node->getValueType(0);
5875   if (isOperationLegalOrCustom(NewOp, VT)) {
5876     SDValue Quiet0 = Node->getOperand(0);
5877     SDValue Quiet1 = Node->getOperand(1);
5878
5879     if (!Node->getFlags().hasNoNaNs()) {
5880       // Insert canonicalizes if it's possible we need to quiet to get correct
5881       // sNaN behavior.
5882       if (!DAG.isKnownNeverSNaN(Quiet0)) {
5883         Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
5884                              Node->getFlags());
5885       }
5886       if (!DAG.isKnownNeverSNaN(Quiet1)) {
5887         Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
5888                              Node->getFlags());
5889       }
5890     }
5891
5892     return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
5893   }
5894
5895   // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
5896   // instead if there are no NaNs.
5897   if (Node->getFlags().hasNoNaNs()) {
5898     unsigned IEEE2018Op =
5899         Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
5900     if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
5901       return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
5902                          Node->getOperand(1), Node->getFlags());
5903     }
5904   }
5905
5906   return SDValue();
5907 }
5908
5909 bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
5910                                  SelectionDAG &DAG) const {
5911   SDLoc dl(Node);
5912   EVT VT = Node->getValueType(0);
5913   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5914   SDValue Op = Node->getOperand(0);
5915   unsigned Len = VT.getScalarSizeInBits();
5916   assert(VT.isInteger() && "CTPOP not implemented for this type.");
5917
5918   // TODO: Add support for irregular type lengths.
5919   if (!(Len <= 128 && Len % 8 == 0))
5920     return false;
5921
5922   // Only expand vector types if we have the appropriate vector bit operations.
5923   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
5924                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
5925                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
5926                         (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
5927                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
5928     return false;
5929
5930   // This is the "best" algorithm from
5931   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
5932   SDValue Mask55 =
5933       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
5934   SDValue Mask33 =
5935       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
5936   SDValue Mask0F =
5937       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
5938   SDValue Mask01 =
5939       DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
5940
5941   // v = v - ((v >> 1) & 0x55555555...)
5942   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
5943                    DAG.getNode(ISD::AND, dl, VT,
5944                                DAG.getNode(ISD::SRL, dl, VT, Op,
5945                                            DAG.getConstant(1, dl, ShVT)),
5946                                Mask55));
5947   // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
5948   Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
5949                    DAG.getNode(ISD::AND, dl, VT,
5950                                DAG.getNode(ISD::SRL, dl, VT, Op,
5951                                            DAG.getConstant(2, dl, ShVT)),
5952                                Mask33));
5953   // v = (v + (v >> 4)) & 0x0F0F0F0F...
5954   Op = DAG.getNode(ISD::AND, dl, VT,
5955                    DAG.getNode(ISD::ADD, dl, VT, Op,
5956                                DAG.getNode(ISD::SRL, dl, VT, Op,
5957                                            DAG.getConstant(4, dl, ShVT))),
5958                    Mask0F);
5959   // v = (v * 0x01010101...) >> (Len - 8)
5960   if (Len > 8)
5961     Op =
5962         DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
5963                     DAG.getConstant(Len - 8, dl, ShVT));
5964
5965   Result = Op;
5966   return true;
5967 }
5968
5969 bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
5970                                 SelectionDAG &DAG) const {
5971   SDLoc dl(Node);
5972   EVT VT = Node->getValueType(0);
5973   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5974   SDValue Op = Node->getOperand(0);
5975   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
5976
5977   // If the non-ZERO_UNDEF version is supported we can use that instead.
5978   if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
5979       isOperationLegalOrCustom(ISD::CTLZ, VT)) {
5980     Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
5981     return true;
5982   }
5983
5984   // If the ZERO_UNDEF version is supported use that and handle the zero case.
5985   if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
5986     EVT SetCCVT =
5987         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5988     SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
5989     SDValue Zero = DAG.getConstant(0, dl, VT);
5990     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
5991     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
5992                          DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
5993     return true;
5994   }
5995
5996   // Only expand vector types if we have the appropriate vector bit operations.
5997   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
5998                         !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
5999                         !isOperationLegalOrCustom(ISD::SRL, VT) ||
6000                         !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
6001     return false;
6002
6003   // for now, we do this:
6004   // x = x | (x >> 1);
6005   // x = x | (x >> 2);
6006   // ...
6007   // x = x | (x >>16);
6008   // x = x | (x >>32); // for 64-bit input
6009   // return popcount(~x);
6010   //
6011   // Ref: "Hacker's Delight" by Henry Warren
6012   for (unsigned i = 0; (1U << i) <= (NumBitsPerElt / 2); ++i) {
6013     SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
6014     Op = DAG.getNode(ISD::OR, dl, VT, Op,
6015                      DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
6016   }
6017   Op = DAG.getNOT(dl, Op, VT);
6018   Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
6019   return true;
6020 }
6021
6022 bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
6023                                 SelectionDAG &DAG) const {
6024   SDLoc dl(Node);
6025   EVT VT = Node->getValueType(0);
6026   SDValue Op = Node->getOperand(0);
6027   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
6028
6029   // If the non-ZERO_UNDEF version is supported we can use that instead.
6030   if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
6031       isOperationLegalOrCustom(ISD::CTTZ, VT)) {
6032     Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
6033     return true;
6034   }
6035
6036   // If the ZERO_UNDEF version is supported use that and handle the zero case.
6037   if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
6038     EVT SetCCVT =
6039         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6040     SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
6041     SDValue Zero = DAG.getConstant(0, dl, VT);
6042     SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
6043     Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
6044                          DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
6045     return true;
6046   }
6047
6048   // Only expand vector types if we have the appropriate vector bit operations.
6049   if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
6050                         (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
6051                          !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
6052                         !isOperationLegalOrCustom(ISD::SUB, VT) ||
6053                         !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
6054                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6055     return false;
6056
6057   // for now, we use: { return popcount(~x & (x - 1)); }
6058   // unless the target has ctlz but not ctpop, in which case we use:
6059   // { return 32 - nlz(~x & (x-1)); }
6060   // Ref: "Hacker's Delight" by Henry Warren
6061   SDValue Tmp = DAG.getNode(
6062       ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
6063       DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
6064
6065   // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
6066   if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
6067     Result =
6068         DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
6069                     DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
6070     return true;
6071   }
6072
6073   Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
6074   return true;
6075 }
6076
6077 bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
6078                                SelectionDAG &DAG) const {
6079   SDLoc dl(N);
6080   EVT VT = N->getValueType(0);
6081   EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6082   SDValue Op = N->getOperand(0);
6083
6084   // Only expand vector types if we have the appropriate vector operations.
6085   if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) ||
6086                         !isOperationLegalOrCustom(ISD::ADD, VT) ||
6087                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
6088     return false;
6089
6090   SDValue Shift =
6091       DAG.getNode(ISD::SRA, dl, VT, Op,
6092                   DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
6093   SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
6094   Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
6095   return true;
6096 }
6097
6098 SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
6099                                             SelectionDAG &DAG) const {
6100   SDLoc SL(LD);
6101   SDValue Chain = LD->getChain();
6102   SDValue BasePTR = LD->getBasePtr();
6103   EVT SrcVT = LD->getMemoryVT();
6104   ISD::LoadExtType ExtType = LD->getExtensionType();
6105
6106   unsigned NumElem = SrcVT.getVectorNumElements();
6107
6108   EVT SrcEltVT = SrcVT.getScalarType();
6109   EVT DstEltVT = LD->getValueType(0).getScalarType();
6110
6111   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
6112   assert(SrcEltVT.isByteSized());
6113
6114   SmallVector<SDValue, 8> Vals;
6115   SmallVector<SDValue, 8> LoadChains;
6116
6117   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6118     SDValue ScalarLoad =
6119         DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
6120                        LD->getPointerInfo().getWithOffset(Idx * Stride),
6121                        SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
6122                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6123
6124     BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
6125
6126     Vals.push_back(ScalarLoad.getValue(0));
6127     LoadChains.push_back(ScalarLoad.getValue(1));
6128   }
6129
6130   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
6131   SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
6132
6133   return DAG.getMergeValues({Value, NewChain}, SL);
6134 }
6135
6136 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
6137                                              SelectionDAG &DAG) const {
6138   SDLoc SL(ST);
6139
6140   SDValue Chain = ST->getChain();
6141   SDValue BasePtr = ST->getBasePtr();
6142   SDValue Value = ST->getValue();
6143   EVT StVT = ST->getMemoryVT();
6144
6145   // The type of the data we want to save
6146   EVT RegVT = Value.getValueType();
6147   EVT RegSclVT = RegVT.getScalarType();
6148
6149   // The type of data as saved in memory.
6150   EVT MemSclVT = StVT.getScalarType();
6151
6152   EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
6153   unsigned NumElem = StVT.getVectorNumElements();
6154
6155   // A vector must always be stored in memory as-is, i.e. without any padding
6156   // between the elements, since various code depend on it, e.g. in the
6157   // handling of a bitcast of a vector type to int, which may be done with a
6158   // vector store followed by an integer load. A vector that does not have
6159   // elements that are byte-sized must therefore be stored as an integer
6160   // built out of the extracted vector elements.
6161   if (!MemSclVT.isByteSized()) {
6162     unsigned NumBits = StVT.getSizeInBits();
6163     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
6164
6165     SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
6166
6167     for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6168       SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6169                                 DAG.getConstant(Idx, SL, IdxVT));
6170       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
6171       SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
6172       unsigned ShiftIntoIdx =
6173           (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
6174       SDValue ShiftAmount =
6175           DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
6176       SDValue ShiftedElt =
6177           DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
6178       CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
6179     }
6180
6181     return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
6182                         ST->getAlignment(), ST->getMemOperand()->getFlags(),
6183                         ST->getAAInfo());
6184   }
6185
6186   // Store Stride in bytes
6187   unsigned Stride = MemSclVT.getSizeInBits() / 8;
6188   assert(Stride && "Zero stride!");
6189   // Extract each of the elements from the original vector and save them into
6190   // memory individually.
6191   SmallVector<SDValue, 8> Stores;
6192   for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
6193     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
6194                               DAG.getConstant(Idx, SL, IdxVT));
6195
6196     SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
6197
6198     // This scalar TruncStore may be illegal, but we legalize it later.
6199     SDValue Store = DAG.getTruncStore(
6200         Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
6201         MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
6202         ST->getMemOperand()->getFlags(), ST->getAAInfo());
6203
6204     Stores.push_back(Store);
6205   }
6206
6207   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
6208 }
6209
6210 std::pair<SDValue, SDValue>
6211 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
6212   assert(LD->getAddressingMode() == ISD::UNINDEXED &&
6213          "unaligned indexed loads not implemented!");
6214   SDValue Chain = LD->getChain();
6215   SDValue Ptr = LD->getBasePtr();
6216   EVT VT = LD->getValueType(0);
6217   EVT LoadedVT = LD->getMemoryVT();
6218   SDLoc dl(LD);
6219   auto &MF = DAG.getMachineFunction();
6220
6221   if (VT.isFloatingPoint() || VT.isVector()) {
6222     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
6223     if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
6224       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
6225           LoadedVT.isVector()) {
6226         // Scalarize the load and let the individual components be handled.
6227         SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
6228         if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
6229           return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
6230         return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
6231       }
6232
6233       // Expand to a (misaligned) integer load of the same size,
6234       // then bitconvert to floating point or vector.
6235       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
6236                                     LD->getMemOperand());
6237       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
6238       if (LoadedVT != VT)
6239         Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
6240                              ISD::ANY_EXTEND, dl, VT, Result);
6241
6242       return std::make_pair(Result, newLoad.getValue(1));
6243     }
6244
6245     // Copy the value to a (aligned) stack slot using (unaligned) integer
6246     // loads and stores, then do a (aligned) load from the stack slot.
6247     MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
6248     unsigned LoadedBytes = LoadedVT.getStoreSize();
6249     unsigned RegBytes = RegVT.getSizeInBits() / 8;
6250     unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
6251
6252     // Make sure the stack slot is also aligned for the register type.
6253     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
6254     auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
6255     SmallVector<SDValue, 8> Stores;
6256     SDValue StackPtr = StackBase;
6257     unsigned Offset = 0;
6258
6259     EVT PtrVT = Ptr.getValueType();
6260     EVT StackPtrVT = StackPtr.getValueType();
6261
6262     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
6263     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
6264
6265     // Do all but one copies using the full register width.
6266     for (unsigned i = 1; i < NumRegs; i++) {
6267       // Load one integer register's worth from the original location.
6268       SDValue Load = DAG.getLoad(
6269           RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
6270           MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
6271           LD->getAAInfo());
6272       // Follow the load with a store to the stack slot.  Remember the store.
6273       Stores.push_back(DAG.getStore(
6274           Load.getValue(1), dl, Load, StackPtr,
6275           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
6276       // Increment the pointers.
6277       Offset += RegBytes;
6278
6279       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
6280       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
6281     }
6282
6283     // The last copy may be partial.  Do an extending load.
6284     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
6285                                   8 * (LoadedBytes - Offset));
6286     SDValue Load =
6287         DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
6288                        LD->getPointerInfo().getWithOffset(Offset), MemVT,
6289                        MinAlign(LD->getAlignment(), Offset),
6290                        LD->getMemOperand()->getFlags(), LD->getAAInfo());
6291     // Follow the load with a store to the stack slot.  Remember the store.
6292     // On big-endian machines this requires a truncating store to ensure
6293     // that the bits end up in the right place.
6294     Stores.push_back(DAG.getTruncStore(
6295         Load.getValue(1), dl, Load, StackPtr,
6296         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
6297
6298     // The order of the stores doesn't matter - say it with a TokenFactor.
6299     SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6300
6301     // Finally, perform the original load only redirected to the stack slot.
6302     Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
6303                           MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
6304                           LoadedVT);
6305
6306     // Callers expect a MERGE_VALUES node.
6307     return std::make_pair(Load, TF);
6308   }
6309
6310   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
6311          "Unaligned load of unsupported type.");
6312
6313   // Compute the new VT that is half the size of the old one.  This is an
6314   // integer MVT.
6315   unsigned NumBits = LoadedVT.getSizeInBits();
6316   EVT NewLoadedVT;
6317   NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
6318   NumBits >>= 1;
6319
6320   unsigned Alignment = LD->getAlignment();
6321   unsigned IncrementSize = NumBits / 8;
6322   ISD::LoadExtType HiExtType = LD->getExtensionType();
6323
6324   // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
6325   if (HiExtType == ISD::NON_EXTLOAD)
6326     HiExtType = ISD::ZEXTLOAD;
6327
6328   // Load the value in two parts
6329   SDValue Lo, Hi;
6330   if (DAG.getDataLayout().isLittleEndian()) {
6331     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6332                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6333                         LD->getAAInfo());
6334
6335     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6336     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
6337                         LD->getPointerInfo().getWithOffset(IncrementSize),
6338                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
6339                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
6340   } else {
6341     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
6342                         NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
6343                         LD->getAAInfo());
6344
6345     Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6346     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
6347                         LD->getPointerInfo().getWithOffset(IncrementSize),
6348                         NewLoadedVT, MinAlign(Alignment, IncrementSize),
6349                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
6350   }
6351
6352   // aggregate the two parts
6353   SDValue ShiftAmount =
6354       DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
6355                                                     DAG.getDataLayout()));
6356   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
6357   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
6358
6359   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
6360                              Hi.getValue(1));
6361
6362   return std::make_pair(Result, TF);
6363 }
6364
6365 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
6366                                              SelectionDAG &DAG) const {
6367   assert(ST->getAddressingMode() == ISD::UNINDEXED &&
6368          "unaligned indexed stores not implemented!");
6369   SDValue Chain = ST->getChain();
6370   SDValue Ptr = ST->getBasePtr();
6371   SDValue Val = ST->getValue();
6372   EVT VT = Val.getValueType();
6373   int Alignment = ST->getAlignment();
6374   auto &MF = DAG.getMachineFunction();
6375   EVT StoreMemVT = ST->getMemoryVT();
6376
6377   SDLoc dl(ST);
6378   if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
6379     EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
6380     if (isTypeLegal(intVT)) {
6381       if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
6382           StoreMemVT.isVector()) {
6383         // Scalarize the store and let the individual components be handled.
6384         SDValue Result = scalarizeVectorStore(ST, DAG);
6385         return Result;
6386       }
6387       // Expand to a bitconvert of the value to the integer type of the
6388       // same size, then a (misaligned) int store.
6389       // FIXME: Does not handle truncating floating point stores!
6390       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
6391       Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
6392                             Alignment, ST->getMemOperand()->getFlags());
6393       return Result;
6394     }
6395     // Do a (aligned) store to a stack slot, then copy from the stack slot
6396     // to the final destination using (unaligned) integer loads and stores.
6397     MVT RegVT = getRegisterType(
6398         *DAG.getContext(),
6399         EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
6400     EVT PtrVT = Ptr.getValueType();
6401     unsigned StoredBytes = StoreMemVT.getStoreSize();
6402     unsigned RegBytes = RegVT.getSizeInBits() / 8;
6403     unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
6404
6405     // Make sure the stack slot is also aligned for the register type.
6406     SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
6407     auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6408
6409     // Perform the original store, only redirected to the stack slot.
6410     SDValue Store = DAG.getTruncStore(
6411         Chain, dl, Val, StackPtr,
6412         MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
6413
6414     EVT StackPtrVT = StackPtr.getValueType();
6415
6416     SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
6417     SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
6418     SmallVector<SDValue, 8> Stores;
6419     unsigned Offset = 0;
6420
6421     // Do all but one copies using the full register width.
6422     for (unsigned i = 1; i < NumRegs; i++) {
6423       // Load one integer register's worth from the stack slot.
6424       SDValue Load = DAG.getLoad(
6425           RegVT, dl, Store, StackPtr,
6426           MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
6427       // Store it to the final location.  Remember the store.
6428       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
6429                                     ST->getPointerInfo().getWithOffset(Offset),
6430                                     MinAlign(ST->getAlignment(), Offset),
6431                                     ST->getMemOperand()->getFlags()));
6432       // Increment the pointers.
6433       Offset += RegBytes;
6434       StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
6435       Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
6436     }
6437
6438     // The last store may be partial.  Do a truncating store.  On big-endian
6439     // machines this requires an extending load from the stack slot to ensure
6440     // that the bits are in the right place.
6441     EVT LoadMemVT =
6442         EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
6443
6444     // Load from the stack slot.
6445     SDValue Load = DAG.getExtLoad(
6446         ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
6447         MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
6448
6449     Stores.push_back(
6450         DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
6451                           ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
6452                           MinAlign(ST->getAlignment(), Offset),
6453                           ST->getMemOperand()->getFlags(), ST->getAAInfo()));
6454     // The order of the stores doesn't matter - say it with a TokenFactor.
6455     SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6456     return Result;
6457   }
6458
6459   assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
6460          "Unaligned store of unknown type.");
6461   // Get the half-size VT
6462   EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
6463   int NumBits = NewStoredVT.getSizeInBits();
6464   int IncrementSize = NumBits / 8;
6465
6466   // Divide the stored value in two parts.
6467   SDValue ShiftAmount = DAG.getConstant(
6468       NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
6469   SDValue Lo = Val;
6470   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
6471
6472   // Store the two parts
6473   SDValue Store1, Store2;
6474   Store1 = DAG.getTruncStore(Chain, dl,
6475                              DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
6476                              Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
6477                              ST->getMemOperand()->getFlags());
6478
6479   Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
6480   Alignment = MinAlign(Alignment, IncrementSize);
6481   Store2 = DAG.getTruncStore(
6482       Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
6483       ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
6484       ST->getMemOperand()->getFlags(), ST->getAAInfo());
6485
6486   SDValue Result =
6487       DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
6488   return Result;
6489 }
6490
6491 SDValue
6492 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
6493                                        const SDLoc &DL, EVT DataVT,
6494                                        SelectionDAG &DAG,
6495                                        bool IsCompressedMemory) const {
6496   SDValue Increment;
6497   EVT AddrVT = Addr.getValueType();
6498   EVT MaskVT = Mask.getValueType();
6499   assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
6500          "Incompatible types of Data and Mask");
6501   if (IsCompressedMemory) {
6502     // Incrementing the pointer according to number of '1's in the mask.
6503     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
6504     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
6505     if (MaskIntVT.getSizeInBits() < 32) {
6506       MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
6507       MaskIntVT = MVT::i32;
6508     }
6509
6510     // Count '1's with POPCNT.
6511     Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
6512     Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
6513     // Scale is an element size in bytes.
6514     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
6515                                     AddrVT);
6516     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
6517   } else
6518     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
6519
6520   return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
6521 }
6522
6523 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
6524                                        SDValue Idx,
6525                                        EVT VecVT,
6526                                        const SDLoc &dl) {
6527   if (isa<ConstantSDNode>(Idx))
6528     return Idx;
6529
6530   EVT IdxVT = Idx.getValueType();
6531   unsigned NElts = VecVT.getVectorNumElements();
6532   if (isPowerOf2_32(NElts)) {
6533     APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
6534                                      Log2_32(NElts));
6535     return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
6536                        DAG.getConstant(Imm, dl, IdxVT));
6537   }
6538
6539   return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
6540                      DAG.getConstant(NElts - 1, dl, IdxVT));
6541 }
6542
6543 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
6544                                                 SDValue VecPtr, EVT VecVT,
6545                                                 SDValue Index) const {
6546   SDLoc dl(Index);
6547   // Make sure the index type is big enough to compute in.
6548   Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
6549
6550   EVT EltVT = VecVT.getVectorElementType();
6551
6552   // Calculate the element offset and add it to the pointer.
6553   unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
6554   assert(EltSize * 8 == EltVT.getSizeInBits() &&
6555          "Converting bits to bytes lost precision");
6556
6557   Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
6558
6559   EVT IdxVT = Index.getValueType();
6560
6561   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
6562                       DAG.getConstant(EltSize, dl, IdxVT));
6563   return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
6564 }
6565
6566 //===----------------------------------------------------------------------===//
6567 // Implementation of Emulated TLS Model
6568 //===----------------------------------------------------------------------===//
6569
6570 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
6571                                                 SelectionDAG &DAG) const {
6572   // Access to address of TLS varialbe xyz is lowered to a function call:
6573   //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
6574   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6575   PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
6576   SDLoc dl(GA);
6577
6578   ArgListTy Args;
6579   ArgListEntry Entry;
6580   std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
6581   Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
6582   StringRef EmuTlsVarName(NameString);
6583   GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
6584   assert(EmuTlsVar && "Cannot find EmuTlsVar ");
6585   Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
6586   Entry.Ty = VoidPtrType;
6587   Args.push_back(Entry);
6588
6589   SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
6590
6591   TargetLowering::CallLoweringInfo CLI(DAG);
6592   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
6593   CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
6594   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
6595
6596   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
6597   // At last for X86 targets, maybe good for other targets too?
6598   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6599   MFI.setAdjustsStack(true); // Is this only for X86 target?
6600   MFI.setHasCalls(true);
6601
6602   assert((GA->getOffset() == 0) &&
6603          "Emulated TLS must have zero offset in GlobalAddressSDNode");
6604   return CallResult.first;
6605 }
6606
6607 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
6608                                                 SelectionDAG &DAG) const {
6609   assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
6610   if (!isCtlzFast())
6611     return SDValue();
6612   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
6613   SDLoc dl(Op);
6614   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6615     if (C->isNullValue() && CC == ISD::SETEQ) {
6616       EVT VT = Op.getOperand(0).getValueType();
6617       SDValue Zext = Op.getOperand(0);
6618       if (VT.bitsLT(MVT::i32)) {
6619         VT = MVT::i32;
6620         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
6621       }
6622       unsigned Log2b = Log2_32(VT.getSizeInBits());
6623       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
6624       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
6625                                 DAG.getConstant(Log2b, dl, MVT::i32));
6626       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
6627     }
6628   }
6629   return SDValue();
6630 }
6631
6632 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
6633   unsigned Opcode = Node->getOpcode();
6634   SDValue LHS = Node->getOperand(0);
6635   SDValue RHS = Node->getOperand(1);
6636   EVT VT = LHS.getValueType();
6637   SDLoc dl(Node);
6638
6639   assert(VT == RHS.getValueType() && "Expected operands to be the same type");
6640   assert(VT.isInteger() && "Expected operands to be integers");
6641
6642   // usub.sat(a, b) -> umax(a, b) - b
6643   if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
6644     SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
6645     return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
6646   }
6647
6648   if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
6649     SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
6650     SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
6651     return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
6652   }
6653
6654   unsigned OverflowOp;
6655   switch (Opcode) {
6656   case ISD::SADDSAT:
6657     OverflowOp = ISD::SADDO;
6658     break;
6659   case ISD::UADDSAT:
6660     OverflowOp = ISD::UADDO;
6661     break;
6662   case ISD::SSUBSAT:
6663     OverflowOp = ISD::SSUBO;
6664     break;
6665   case ISD::USUBSAT:
6666     OverflowOp = ISD::USUBO;
6667     break;
6668   default:
6669     llvm_unreachable("Expected method to receive signed or unsigned saturation "
6670                      "addition or subtraction node.");
6671   }
6672
6673   unsigned BitWidth = LHS.getScalarValueSizeInBits();
6674   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6675   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
6676                                LHS, RHS);
6677   SDValue SumDiff = Result.getValue(0);
6678   SDValue Overflow = Result.getValue(1);
6679   SDValue Zero = DAG.getConstant(0, dl, VT);
6680   SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
6681
6682   if (Opcode == ISD::UADDSAT) {
6683     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
6684       // (LHS + RHS) | OverflowMask
6685       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
6686       return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
6687     }
6688     // Overflow ? 0xffff.... : (LHS + RHS)
6689     return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
6690   } else if (Opcode == ISD::USUBSAT) {
6691     if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
6692       // (LHS - RHS) & ~OverflowMask
6693       SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
6694       SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
6695       return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
6696     }
6697     // Overflow ? 0 : (LHS - RHS)
6698     return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
6699   } else {
6700     // SatMax -> Overflow && SumDiff < 0
6701     // SatMin -> Overflow && SumDiff >= 0
6702     APInt MinVal = APInt::getSignedMinValue(BitWidth);
6703     APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
6704     SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
6705     SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
6706     SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
6707     Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
6708     return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
6709   }
6710 }
6711
6712 SDValue
6713 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
6714   assert((Node->getOpcode() == ISD::SMULFIX ||
6715           Node->getOpcode() == ISD::UMULFIX ||
6716           Node->getOpcode() == ISD::SMULFIXSAT ||
6717           Node->getOpcode() == ISD::UMULFIXSAT) &&
6718          "Expected a fixed point multiplication opcode");
6719
6720   SDLoc dl(Node);
6721   SDValue LHS = Node->getOperand(0);
6722   SDValue RHS = Node->getOperand(1);
6723   EVT VT = LHS.getValueType();
6724   unsigned Scale = Node->getConstantOperandVal(2);
6725   bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
6726                      Node->getOpcode() == ISD::UMULFIXSAT);
6727   bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
6728                  Node->getOpcode() == ISD::SMULFIXSAT);
6729   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6730   unsigned VTSize = VT.getScalarSizeInBits();
6731
6732   if (!Scale) {
6733     // [us]mul.fix(a, b, 0) -> mul(a, b)
6734     if (!Saturating) {
6735       if (isOperationLegalOrCustom(ISD::MUL, VT))
6736         return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
6737     } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
6738       SDValue Result =
6739           DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
6740       SDValue Product = Result.getValue(0);
6741       SDValue Overflow = Result.getValue(1);
6742       SDValue Zero = DAG.getConstant(0, dl, VT);
6743
6744       APInt MinVal = APInt::getSignedMinValue(VTSize);
6745       APInt MaxVal = APInt::getSignedMaxValue(VTSize);
6746       SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
6747       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
6748       SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
6749       Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
6750       return DAG.getSelect(dl, VT, Overflow, Result, Product);
6751     } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
6752       SDValue Result =
6753           DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
6754       SDValue Product = Result.getValue(0);
6755       SDValue Overflow = Result.getValue(1);
6756
6757       APInt MaxVal = APInt::getMaxValue(VTSize);
6758       SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
6759       return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
6760     }
6761   }
6762
6763   assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
6764          "Expected scale to be less than the number of bits if signed or at "
6765          "most the number of bits if unsigned.");
6766   assert(LHS.getValueType() == RHS.getValueType() &&
6767          "Expected both operands to be the same type");
6768
6769   // Get the upper and lower bits of the result.
6770   SDValue Lo, Hi;
6771   unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
6772   unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
6773   if (isOperationLegalOrCustom(LoHiOp, VT)) {
6774     SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
6775     Lo = Result.getValue(0);
6776     Hi = Result.getValue(1);
6777   } else if (isOperationLegalOrCustom(HiOp, VT)) {
6778     Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
6779     Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
6780   } else if (VT.isVector()) {
6781     return SDValue();
6782   } else {
6783     report_fatal_error("Unable to expand fixed point multiplication.");
6784   }
6785
6786   if (Scale == VTSize)
6787     // Result is just the top half since we'd be shifting by the width of the
6788     // operand. Overflow impossible so this works for both UMULFIX and
6789     // UMULFIXSAT.
6790     return Hi;
6791
6792   // The result will need to be shifted right by the scale since both operands
6793   // are scaled. The result is given to us in 2 halves, so we only want part of
6794   // both in the result.
6795   EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
6796   SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
6797                                DAG.getConstant(Scale, dl, ShiftTy));
6798   if (!Saturating)
6799     return Result;
6800
6801   if (!Signed) {
6802     // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
6803     // widened multiplication) aren't all zeroes.
6804
6805     // Saturate to max if ((Hi >> Scale) != 0),
6806     // which is the same as if (Hi > ((1 << Scale) - 1))
6807     APInt MaxVal = APInt::getMaxValue(VTSize);
6808     SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
6809                                       dl, VT);
6810     Result = DAG.getSelectCC(dl, Hi, LowMask,
6811                              DAG.getConstant(MaxVal, dl, VT), Result,
6812                              ISD::SETUGT);
6813
6814     return Result;
6815   }
6816
6817   // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
6818   // widened multiplication) aren't all ones or all zeroes.
6819
6820   SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
6821   SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
6822
6823   if (Scale == 0) {
6824     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
6825                                DAG.getConstant(VTSize - 1, dl, ShiftTy));
6826     SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
6827     // Saturated to SatMin if wide product is negative, and SatMax if wide
6828     // product is positive ...
6829     SDValue Zero = DAG.getConstant(0, dl, VT);
6830     SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
6831                                                ISD::SETLT);
6832     // ... but only if we overflowed.
6833     return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
6834   }
6835
6836   //  We handled Scale==0 above so all the bits to examine is in Hi.
6837
6838   // Saturate to max if ((Hi >> (Scale - 1)) > 0),
6839   // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
6840   SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
6841                                     dl, VT);
6842   Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
6843   // Saturate to min if (Hi >> (Scale - 1)) < -1),
6844   // which is the same as if (HI < (-1 << (Scale - 1))
6845   SDValue HighMask =
6846       DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
6847                       dl, VT);
6848   Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
6849   return Result;
6850 }
6851
6852 void TargetLowering::expandUADDSUBO(
6853     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
6854   SDLoc dl(Node);
6855   SDValue LHS = Node->getOperand(0);
6856   SDValue RHS = Node->getOperand(1);
6857   bool IsAdd = Node->getOpcode() == ISD::UADDO;
6858
6859   // If ADD/SUBCARRY is legal, use that instead.
6860   unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
6861   if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
6862     SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
6863     SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
6864                                     { LHS, RHS, CarryIn });
6865     Result = SDValue(NodeCarry.getNode(), 0);
6866     Overflow = SDValue(NodeCarry.getNode(), 1);
6867     return;
6868   }
6869
6870   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
6871                             LHS.getValueType(), LHS, RHS);
6872
6873   EVT ResultType = Node->getValueType(1);
6874   EVT SetCCType = getSetCCResultType(
6875       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
6876   ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
6877   SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
6878   Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
6879 }
6880
6881 void TargetLowering::expandSADDSUBO(
6882     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
6883   SDLoc dl(Node);
6884   SDValue LHS = Node->getOperand(0);
6885   SDValue RHS = Node->getOperand(1);
6886   bool IsAdd = Node->getOpcode() == ISD::SADDO;
6887
6888   Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
6889                             LHS.getValueType(), LHS, RHS);
6890
6891   EVT ResultType = Node->getValueType(1);
6892   EVT OType = getSetCCResultType(
6893       DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
6894
6895   // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
6896   unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
6897   if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
6898     SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
6899     SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
6900     Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
6901     return;
6902   }
6903
6904   SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
6905
6906   //   LHSSign -> LHS >= 0
6907   //   RHSSign -> RHS >= 0
6908   //   SumSign -> Result >= 0
6909   //
6910   //   Add:
6911   //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
6912   //   Sub:
6913   //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
6914   SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
6915   SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
6916   SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
6917                                     IsAdd ? ISD::SETEQ : ISD::SETNE);
6918
6919   SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE);
6920   SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
6921
6922   SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
6923   Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType);
6924 }
6925
6926 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
6927                                 SDValue &Overflow, SelectionDAG &DAG) const {
6928   SDLoc dl(Node);
6929   EVT VT = Node->getValueType(0);
6930   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6931   SDValue LHS = Node->getOperand(0);
6932   SDValue RHS = Node->getOperand(1);
6933   bool isSigned = Node->getOpcode() == ISD::SMULO;
6934
6935   // For power-of-two multiplications we can use a simpler shift expansion.
6936   if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
6937     const APInt &C = RHSC->getAPIntValue();
6938     // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
6939     if (C.isPowerOf2()) {
6940       // smulo(x, signed_min) is same as umulo(x, signed_min).
6941       bool UseArithShift = isSigned && !C.isMinSignedValue();
6942       EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
6943       SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
6944       Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
6945       Overflow = DAG.getSetCC(dl, SetCCVT,
6946           DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
6947                       dl, VT, Result, ShiftAmt),
6948           LHS, ISD::SETNE);
6949       return true;
6950     }
6951   }
6952
6953   EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
6954   if (VT.isVector())
6955     WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6956                               VT.getVectorNumElements());
6957
6958   SDValue BottomHalf;
6959   SDValue TopHalf;
6960   static const unsigned Ops[2][3] =
6961       { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
6962         { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
6963   if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
6964     BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
6965     TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
6966   } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
6967     BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
6968                              RHS);
6969     TopHalf = BottomHalf.getValue(1);
6970   } else if (isTypeLegal(WideVT)) {
6971     LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
6972     RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
6973     SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
6974     BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
6975     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
6976         getShiftAmountTy(WideVT, DAG.getDataLayout()));
6977     TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
6978                           DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
6979   } else {
6980     if (VT.isVector())
6981       return false;
6982
6983     // We can fall back to a libcall with an illegal type for the MUL if we
6984     // have a libcall big enough.
6985     // Also, we can fall back to a division in some cases, but that's a big
6986     // performance hit in the general case.
6987     RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
6988     if (WideVT == MVT::i16)
6989       LC = RTLIB::MUL_I16;
6990     else if (WideVT == MVT::i32)
6991       LC = RTLIB::MUL_I32;
6992     else if (WideVT == MVT::i64)
6993       LC = RTLIB::MUL_I64;
6994     else if (WideVT == MVT::i128)
6995       LC = RTLIB::MUL_I128;
6996     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
6997
6998     SDValue HiLHS;
6999     SDValue HiRHS;
7000     if (isSigned) {
7001       // The high part is obtained by SRA'ing all but one of the bits of low
7002       // part.
7003       unsigned LoSize = VT.getSizeInBits();
7004       HiLHS =
7005           DAG.getNode(ISD::SRA, dl, VT, LHS,
7006                       DAG.getConstant(LoSize - 1, dl,
7007                                       getPointerTy(DAG.getDataLayout())));
7008       HiRHS =
7009           DAG.getNode(ISD::SRA, dl, VT, RHS,
7010                       DAG.getConstant(LoSize - 1, dl,
7011                                       getPointerTy(DAG.getDataLayout())));
7012     } else {
7013         HiLHS = DAG.getConstant(0, dl, VT);
7014         HiRHS = DAG.getConstant(0, dl, VT);
7015     }
7016
7017     // Here we're passing the 2 arguments explicitly as 4 arguments that are
7018     // pre-lowered to the correct types. This all depends upon WideVT not
7019     // being a legal type for the architecture and thus has to be split to
7020     // two arguments.
7021     SDValue Ret;
7022     TargetLowering::MakeLibCallOptions CallOptions;
7023     CallOptions.setSExt(isSigned);
7024     CallOptions.setIsPostTypeLegalization(true);
7025     if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
7026       // Halves of WideVT are packed into registers in different order
7027       // depending on platform endianness. This is usually handled by
7028       // the C calling convention, but we can't defer to it in
7029       // the legalizer.
7030       SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
7031       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7032     } else {
7033       SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
7034       Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
7035     }
7036     assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
7037            "Ret value is a collection of constituent nodes holding result.");
7038     if (DAG.getDataLayout().isLittleEndian()) {
7039       // Same as above.
7040       BottomHalf = Ret.getOperand(0);
7041       TopHalf = Ret.getOperand(1);
7042     } else {
7043       BottomHalf = Ret.getOperand(1);
7044       TopHalf = Ret.getOperand(0);
7045     }
7046   }
7047
7048   Result = BottomHalf;
7049   if (isSigned) {
7050     SDValue ShiftAmt = DAG.getConstant(
7051         VT.getScalarSizeInBits() - 1, dl,
7052         getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
7053     SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
7054     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
7055   } else {
7056     Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
7057                             DAG.getConstant(0, dl, VT), ISD::SETNE);
7058   }
7059
7060   // Truncate the result if SetCC returns a larger type than needed.
7061   EVT RType = Node->getValueType(1);
7062   if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
7063     Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
7064
7065   assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
7066          "Unexpected result type for S/UMULO legalization");
7067   return true;
7068 }
7069
7070 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
7071   SDLoc dl(Node);
7072   bool NoNaN = Node->getFlags().hasNoNaNs();
7073   unsigned BaseOpcode = 0;
7074   switch (Node->getOpcode()) {
7075   default: llvm_unreachable("Expected VECREDUCE opcode");
7076   case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
7077   case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
7078   case ISD::VECREDUCE_ADD:  BaseOpcode = ISD::ADD; break;
7079   case ISD::VECREDUCE_MUL:  BaseOpcode = ISD::MUL; break;
7080   case ISD::VECREDUCE_AND:  BaseOpcode = ISD::AND; break;
7081   case ISD::VECREDUCE_OR:   BaseOpcode = ISD::OR; break;
7082   case ISD::VECREDUCE_XOR:  BaseOpcode = ISD::XOR; break;
7083   case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
7084   case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
7085   case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
7086   case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
7087   case ISD::VECREDUCE_FMAX:
7088     BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
7089     break;
7090   case ISD::VECREDUCE_FMIN:
7091     BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
7092     break;
7093   }
7094
7095   SDValue Op = Node->getOperand(0);
7096   EVT VT = Op.getValueType();
7097
7098   // Try to use a shuffle reduction for power of two vectors.
7099   if (VT.isPow2VectorType()) {
7100     while (VT.getVectorNumElements() > 1) {
7101       EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
7102       if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
7103         break;
7104
7105       SDValue Lo, Hi;
7106       std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
7107       Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
7108       VT = HalfVT;
7109     }
7110   }
7111
7112   EVT EltVT = VT.getVectorElementType();
7113   unsigned NumElts = VT.getVectorNumElements();
7114
7115   SmallVector<SDValue, 8> Ops;
7116   DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
7117
7118   SDValue Res = Ops[0];
7119   for (unsigned i = 1; i < NumElts; i++)
7120     Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
7121
7122   // Result type may be wider than element type.
7123   if (EltVT != Node->getValueType(0))
7124     Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
7125   return Res;
7126 }