lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  11 // both before and after the DAG is legalized.
  12 //
  13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  14 // primarily intended to handle simplification opportunities that are implicit
  15 // in the LLVM IR and exposed by the various codegen lowering phases.
  16 //
  17 //===----------------------------------------------------------------------===//
  18
  19 #include "llvm/ADT/APFloat.h"
  20 #include "llvm/ADT/APInt.h"
  21 #include "llvm/ADT/ArrayRef.h"
  22 #include "llvm/ADT/DenseMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallBitVector.h"
  28 #include "llvm/ADT/SmallPtrSet.h"
  29 #include "llvm/ADT/SmallSet.h"
  30 #include "llvm/ADT/SmallVector.h"
  31 #include "llvm/ADT/Statistic.h"
  32 #include "llvm/Analysis/AliasAnalysis.h"
  33 #include "llvm/Analysis/MemoryLocation.h"
  34 #include "llvm/CodeGen/DAGCombine.h"
  35 #include "llvm/CodeGen/ISDOpcodes.h"
  36 #include "llvm/CodeGen/MachineFrameInfo.h"
  37 #include "llvm/CodeGen/MachineFunction.h"
  38 #include "llvm/CodeGen/MachineMemOperand.h"
  39 #include "llvm/CodeGen/RuntimeLibcalls.h"
  40 #include "llvm/CodeGen/SelectionDAG.h"
  41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  42 #include "llvm/CodeGen/SelectionDAGNodes.h"
  43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  44 #include "llvm/CodeGen/TargetLowering.h"
  45 #include "llvm/CodeGen/TargetRegisterInfo.h"
  46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  47 #include "llvm/CodeGen/ValueTypes.h"
  48 #include "llvm/IR/Attributes.h"
  49 #include "llvm/IR/Constant.h"
  50 #include "llvm/IR/DataLayout.h"
  51 #include "llvm/IR/DerivedTypes.h"
  52 #include "llvm/IR/Function.h"
  53 #include "llvm/IR/LLVMContext.h"
  54 #include "llvm/IR/Metadata.h"
  55 #include "llvm/Support/Casting.h"
  56 #include "llvm/Support/CodeGen.h"
  57 #include "llvm/Support/CommandLine.h"
  58 #include "llvm/Support/Compiler.h"
  59 #include "llvm/Support/Debug.h"
  60 #include "llvm/Support/ErrorHandling.h"
  61 #include "llvm/Support/KnownBits.h"
  62 #include "llvm/Support/MachineValueType.h"
  63 #include "llvm/Support/MathExtras.h"
  64 #include "llvm/Support/raw_ostream.h"
  65 #include "llvm/Target/TargetMachine.h"
  66 #include "llvm/Target/TargetOptions.h"
  67 #include <algorithm>
  68 #include <cassert>
  69 #include <cstdint>
  70 #include <functional>
  71 #include <iterator>
  72 #include <string>
  73 #include <tuple>
  74 #include <utility>
  75
  76 using namespace llvm;
  77
  78 #define DEBUG_TYPE "dagcombine"
  79
  80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  85 STATISTIC(SlicedLoads, "Number of load sliced");
  86
  87 static cl::opt<bool>
  88 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  89                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  90
  91 static cl::opt<bool>
  92 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  93         cl::desc("Enable DAG combiner's use of TBAA"));
  94
  95 #ifndef NDEBUG
  96 static cl::opt<std::string>
  97 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  98                    cl::desc("Only use DAG-combiner alias analysis in this"
  99                             " function"));
 100 #endif
 101
 102 /// Hidden option to stress test load slicing, i.e., when this option
 103 /// is enabled, load slicing bypasses most of its profitability guards.
 104 static cl::opt<bool>
 105 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 106                   cl::desc("Bypass the profitability model of load slicing"),
 107                   cl::init(false));
 108
 109 static cl::opt<bool>
 110   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 111                     cl::desc("DAG combiner may split indexing from loads"));
 112
 113 namespace {
 114
 115   class DAGCombiner {
 116     SelectionDAG &DAG;
 117     const TargetLowering &TLI;
 118     CombineLevel Level;
 119     CodeGenOpt::Level OptLevel;
 120     bool LegalOperations = false;
 121     bool LegalTypes = false;
 122     bool ForCodeSize;
 123
 124     /// Worklist of all of the nodes that need to be simplified.
 125     ///
 126     /// This must behave as a stack -- new nodes to process are pushed onto the
 127     /// back and when processing we pop off of the back.
 128     ///
 129     /// The worklist will not contain duplicates but may contain null entries
 130     /// due to nodes being deleted from the underlying DAG.
 131     SmallVector<SDNode *, 64> Worklist;
 132
 133     /// Mapping from an SDNode to its position on the worklist.
 134     ///
 135     /// This is used to find and remove nodes from the worklist (by nulling
 136     /// them) when they are deleted from the underlying DAG. It relies on
 137     /// stable indices of nodes within the worklist.
 138     DenseMap<SDNode *, unsigned> WorklistMap;
 139
 140     /// Set of nodes which have been combined (at least once).
 141     ///
 142     /// This is used to allow us to reliably add any operands of a DAG node
 143     /// which have not yet been combined to the worklist.
 144     SmallPtrSet<SDNode *, 32> CombinedNodes;
 145
 146     // AA - Used for DAG load/store alias analysis.
 147     AliasAnalysis *AA;
 148
 149     /// When an instruction is simplified, add all users of the instruction to
 150     /// the work lists because they might get more simplified now.
 151     void AddUsersToWorklist(SDNode *N) {
 152       for (SDNode *Node : N->uses())
 153         AddToWorklist(Node);
 154     }
 155
 156     /// Call the node-specific routine that folds each particular type of node.
 157     SDValue visit(SDNode *N);
 158
 159   public:
 160     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 161         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
 162           OptLevel(OL), AA(AA) {
 163       ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
 164
 165       MaximumLegalStoreInBits = 0;
 166       for (MVT VT : MVT::all_valuetypes())
 167         if (EVT(VT).isSimple() && VT != MVT::Other &&
 168             TLI.isTypeLegal(EVT(VT)) &&
 169             VT.getSizeInBits() >= MaximumLegalStoreInBits)
 170           MaximumLegalStoreInBits = VT.getSizeInBits();
 171     }
 172
 173     /// Add to the worklist making sure its instance is at the back (next to be
 174     /// processed.)
 175     void AddToWorklist(SDNode *N) {
 176       assert(N->getOpcode() != ISD::DELETED_NODE &&
 177              "Deleted Node added to Worklist");
 178
 179       // Skip handle nodes as they can't usefully be combined and confuse the
 180       // zero-use deletion strategy.
 181       if (N->getOpcode() == ISD::HANDLENODE)
 182         return;
 183
 184       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 185         Worklist.push_back(N);
 186     }
 187
 188     /// Remove all instances of N from the worklist.
 189     void removeFromWorklist(SDNode *N) {
 190       CombinedNodes.erase(N);
 191
 192       auto It = WorklistMap.find(N);
 193       if (It == WorklistMap.end())
 194         return; // Not in the worklist.
 195
 196       // Null out the entry rather than erasing it to avoid a linear operation.
 197       Worklist[It->second] = nullptr;
 198       WorklistMap.erase(It);
 199     }
 200
 201     void deleteAndRecombine(SDNode *N);
 202     bool recursivelyDeleteUnusedNodes(SDNode *N);
 203
 204     /// Replaces all uses of the results of one DAG node with new values.
 205     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 206                       bool AddTo = true);
 207
 208     /// Replaces all uses of the results of one DAG node with new values.
 209     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 210       return CombineTo(N, &Res, 1, AddTo);
 211     }
 212
 213     /// Replaces all uses of the results of one DAG node with new values.
 214     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 215                       bool AddTo = true) {
 216       SDValue To[] = { Res0, Res1 };
 217       return CombineTo(N, To, 2, AddTo);
 218     }
 219
 220     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 221
 222   private:
 223     unsigned MaximumLegalStoreInBits;
 224
 225     /// Check the specified integer node value to see if it can be simplified or
 226     /// if things it uses can be simplified by bit propagation.
 227     /// If so, return true.
 228     bool SimplifyDemandedBits(SDValue Op) {
 229       unsigned BitWidth = Op.getScalarValueSizeInBits();
 230       APInt Demanded = APInt::getAllOnesValue(BitWidth);
 231       return SimplifyDemandedBits(Op, Demanded);
 232     }
 233
 234     /// Check the specified vector node value to see if it can be simplified or
 235     /// if things it uses can be simplified as it only uses some of the
 236     /// elements. If so, return true.
 237     bool SimplifyDemandedVectorElts(SDValue Op) {
 238       unsigned NumElts = Op.getValueType().getVectorNumElements();
 239       APInt Demanded = APInt::getAllOnesValue(NumElts);
 240       return SimplifyDemandedVectorElts(Op, Demanded);
 241     }
 242
 243     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
 244     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
 245                                     bool AssumeSingleUse = false);
 246
 247     bool CombineToPreIndexedLoadStore(SDNode *N);
 248     bool CombineToPostIndexedLoadStore(SDNode *N);
 249     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 250     bool SliceUpLoad(SDNode *N);
 251
 252     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 253     ///   load.
 254     ///
 255     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 256     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 257     /// \param EltNo index of the vector element to load.
 258     /// \param OriginalLoad load that EVE came from to be replaced.
 259     /// \returns EVE on success SDValue() on failure.
 260     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
 261         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
 262     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 263     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 264     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 265     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 266     SDValue PromoteIntBinOp(SDValue Op);
 267     SDValue PromoteIntShiftOp(SDValue Op);
 268     SDValue PromoteExtend(SDValue Op);
 269     bool PromoteLoad(SDValue Op);
 270
 271     /// Call the node-specific routine that knows how to fold each
 272     /// particular type of node. If that doesn't do anything, try the
 273     /// target-specific DAG combines.
 274     SDValue combine(SDNode *N);
 275
 276     // Visitation implementation - Implement dag node combining for different
 277     // node types.  The semantics are as follows:
 278     // Return Value:
 279     //   SDValue.getNode() == 0 - No change was made
 280     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 281     //   otherwise              - N should be replaced by the returned Operand.
 282     //
 283     SDValue visitTokenFactor(SDNode *N);
 284     SDValue visitMERGE_VALUES(SDNode *N);
 285     SDValue visitADD(SDNode *N);
 286     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
 287     SDValue visitSUB(SDNode *N);
 288     SDValue visitADDC(SDNode *N);
 289     SDValue visitUADDO(SDNode *N);
 290     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 291     SDValue visitSUBC(SDNode *N);
 292     SDValue visitUSUBO(SDNode *N);
 293     SDValue visitADDE(SDNode *N);
 294     SDValue visitADDCARRY(SDNode *N);
 295     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 296     SDValue visitSUBE(SDNode *N);
 297     SDValue visitSUBCARRY(SDNode *N);
 298     SDValue visitMUL(SDNode *N);
 299     SDValue useDivRem(SDNode *N);
 300     SDValue visitSDIV(SDNode *N);
 301     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 302     SDValue visitUDIV(SDNode *N);
 303     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 304     SDValue visitREM(SDNode *N);
 305     SDValue visitMULHU(SDNode *N);
 306     SDValue visitMULHS(SDNode *N);
 307     SDValue visitSMUL_LOHI(SDNode *N);
 308     SDValue visitUMUL_LOHI(SDNode *N);
 309     SDValue visitSMULO(SDNode *N);
 310     SDValue visitUMULO(SDNode *N);
 311     SDValue visitIMINMAX(SDNode *N);
 312     SDValue visitAND(SDNode *N);
 313     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 314     SDValue visitOR(SDNode *N);
 315     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 316     SDValue visitXOR(SDNode *N);
 317     SDValue SimplifyVBinOp(SDNode *N);
 318     SDValue visitSHL(SDNode *N);
 319     SDValue visitSRA(SDNode *N);
 320     SDValue visitSRL(SDNode *N);
 321     SDValue visitRotate(SDNode *N);
 322     SDValue visitABS(SDNode *N);
 323     SDValue visitBSWAP(SDNode *N);
 324     SDValue visitBITREVERSE(SDNode *N);
 325     SDValue visitCTLZ(SDNode *N);
 326     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 327     SDValue visitCTTZ(SDNode *N);
 328     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 329     SDValue visitCTPOP(SDNode *N);
 330     SDValue visitSELECT(SDNode *N);
 331     SDValue visitVSELECT(SDNode *N);
 332     SDValue visitSELECT_CC(SDNode *N);
 333     SDValue visitSETCC(SDNode *N);
 334     SDValue visitSETCCCARRY(SDNode *N);
 335     SDValue visitSIGN_EXTEND(SDNode *N);
 336     SDValue visitZERO_EXTEND(SDNode *N);
 337     SDValue visitANY_EXTEND(SDNode *N);
 338     SDValue visitAssertExt(SDNode *N);
 339     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 340     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
 341     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
 342     SDValue visitTRUNCATE(SDNode *N);
 343     SDValue visitBITCAST(SDNode *N);
 344     SDValue visitBUILD_PAIR(SDNode *N);
 345     SDValue visitFADD(SDNode *N);
 346     SDValue visitFSUB(SDNode *N);
 347     SDValue visitFMUL(SDNode *N);
 348     SDValue visitFMA(SDNode *N);
 349     SDValue visitFDIV(SDNode *N);
 350     SDValue visitFREM(SDNode *N);
 351     SDValue visitFSQRT(SDNode *N);
 352     SDValue visitFCOPYSIGN(SDNode *N);
 353     SDValue visitSINT_TO_FP(SDNode *N);
 354     SDValue visitUINT_TO_FP(SDNode *N);
 355     SDValue visitFP_TO_SINT(SDNode *N);
 356     SDValue visitFP_TO_UINT(SDNode *N);
 357     SDValue visitFP_ROUND(SDNode *N);
 358     SDValue visitFP_ROUND_INREG(SDNode *N);
 359     SDValue visitFP_EXTEND(SDNode *N);
 360     SDValue visitFNEG(SDNode *N);
 361     SDValue visitFABS(SDNode *N);
 362     SDValue visitFCEIL(SDNode *N);
 363     SDValue visitFTRUNC(SDNode *N);
 364     SDValue visitFFLOOR(SDNode *N);
 365     SDValue visitFMINNUM(SDNode *N);
 366     SDValue visitFMAXNUM(SDNode *N);
 367     SDValue visitBRCOND(SDNode *N);
 368     SDValue visitBR_CC(SDNode *N);
 369     SDValue visitLOAD(SDNode *N);
 370
 371     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 372     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 373
 374     SDValue visitSTORE(SDNode *N);
 375     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 376     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 377     SDValue visitBUILD_VECTOR(SDNode *N);
 378     SDValue visitCONCAT_VECTORS(SDNode *N);
 379     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 380     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 381     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 382     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 383     SDValue visitMLOAD(SDNode *N);
 384     SDValue visitMSTORE(SDNode *N);
 385     SDValue visitMGATHER(SDNode *N);
 386     SDValue visitMSCATTER(SDNode *N);
 387     SDValue visitFP_TO_FP16(SDNode *N);
 388     SDValue visitFP16_TO_FP(SDNode *N);
 389
 390     SDValue visitFADDForFMACombine(SDNode *N);
 391     SDValue visitFSUBForFMACombine(SDNode *N);
 392     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 393
 394     SDValue XformToShuffleWithZero(SDNode *N);
 395     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 396                            SDValue N1, SDNodeFlags Flags);
 397
 398     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 399
 400     SDValue foldSelectOfConstants(SDNode *N);
 401     SDValue foldVSelectOfConstants(SDNode *N);
 402     SDValue foldBinOpIntoSelect(SDNode *BO);
 403     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 404     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
 405     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 406     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 407                              SDValue N2, SDValue N3, ISD::CondCode CC,
 408                              bool NotExtCompare = false);
 409     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 410                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 411     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 412                               const SDLoc &DL);
 413     SDValue unfoldMaskedMerge(SDNode *N);
 414     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 415     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 416                           const SDLoc &DL, bool foldBooleans);
 417     SDValue rebuildSetCC(SDValue N);
 418
 419     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 420                            SDValue &CC) const;
 421     bool isOneUseSetCC(SDValue N) const;
 422
 423     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 424                                          unsigned HiOp);
 425     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 426     SDValue CombineExtLoad(SDNode *N);
 427     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 428     SDValue combineRepeatedFPDivisors(SDNode *N);
 429     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 430     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 431     SDValue BuildSDIV(SDNode *N);
 432     SDValue BuildSDIVPow2(SDNode *N);
 433     SDValue BuildUDIV(SDNode *N);
 434     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 435     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
 436     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 437     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 438     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 439     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 440                                 SDNodeFlags Flags, bool Reciprocal);
 441     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 442                                 SDNodeFlags Flags, bool Reciprocal);
 443     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 444                                bool DemandHighBits = true);
 445     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 446     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 447                               SDValue InnerPos, SDValue InnerNeg,
 448                               unsigned PosOpcode, unsigned NegOpcode,
 449                               const SDLoc &DL);
 450     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 451     SDValue MatchLoadCombine(SDNode *N);
 452     SDValue ReduceLoadWidth(SDNode *N);
 453     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 454     SDValue splitMergedValStore(StoreSDNode *ST);
 455     SDValue TransformFPLoadStorePair(SDNode *N);
 456     SDValue convertBuildVecZextToZext(SDNode *N);
 457     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 458     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
 459     SDValue reduceBuildVecToShuffle(SDNode *N);
 460     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 461                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 462                                   SDValue VecIn2, unsigned LeftIdx);
 463     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 464
 465     /// Walk up chain skipping non-aliasing memory nodes,
 466     /// looking for aliasing nodes and adding them to the Aliases vector.
 467     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 468                           SmallVectorImpl<SDValue> &Aliases);
 469
 470     /// Return true if there is any possibility that the two addresses overlap.
 471     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
 472
 473     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 474     /// chain (aliasing node.)
 475     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 476
 477     /// Try to replace a store and any possibly adjacent stores on
 478     /// consecutive chains with better chains. Return true only if St is
 479     /// replaced.
 480     ///
 481     /// Notice that other chains may still be replaced even if the function
 482     /// returns false.
 483     bool findBetterNeighborChains(StoreSDNode *St);
 484
 485     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 486     /// is located in a sequence of memory operations connected by a chain.
 487     struct MemOpLink {
 488       // Ptr to the mem node.
 489       LSBaseSDNode *MemNode;
 490
 491       // Offset from the base ptr.
 492       int64_t OffsetFromBase;
 493
 494       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 495           : MemNode(N), OffsetFromBase(Offset) {}
 496     };
 497
 498     /// This is a helper function for visitMUL to check the profitability
 499     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 500     /// MulNode is the original multiply, AddNode is (add x, c1),
 501     /// and ConstNode is c2.
 502     bool isMulAddWithConstProfitable(SDNode *MulNode,
 503                                      SDValue &AddNode,
 504                                      SDValue &ConstNode);
 505
 506     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 507     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 508     /// the type of the loaded value to be extended.
 509     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 510                           EVT LoadResultTy, EVT &ExtVT);
 511
 512     /// Helper function to calculate whether the given Load/Store can have its
 513     /// width reduced to ExtVT.
 514     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 515                            EVT &MemVT, unsigned ShAmt = 0);
 516
 517     /// Used by BackwardsPropagateMask to find suitable loads.
 518     bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
 519                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 520                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 521     /// Attempt to propagate a given AND node back to load leaves so that they
 522     /// can be combined into narrow loads.
 523     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
 524
 525     /// Helper function for MergeConsecutiveStores which merges the
 526     /// component store chains.
 527     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 528                                 unsigned NumStores);
 529
 530     /// This is a helper function for MergeConsecutiveStores. When the
 531     /// source elements of the consecutive stores are all constants or
 532     /// all extracted vector elements, try to merge them into one
 533     /// larger store introducing bitcasts if necessary.  \return True
 534     /// if a merged store was created.
 535     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 536                                          EVT MemVT, unsigned NumStores,
 537                                          bool IsConstantSrc, bool UseVector,
 538                                          bool UseTrunc);
 539
 540     /// This is a helper function for MergeConsecutiveStores. Stores
 541     /// that potentially may be merged with St are placed in
 542     /// StoreNodes. RootNode is a chain predecessor to all store
 543     /// candidates.
 544     void getStoreMergeCandidates(StoreSDNode *St,
 545                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 546                                  SDNode *&Root);
 547
 548     /// Helper function for MergeConsecutiveStores. Checks if
 549     /// candidate stores have indirect dependency through their
 550     /// operands. RootNode is the predecessor to all stores calculated
 551     /// by getStoreMergeCandidates and is used to prune the dependency check.
 552     /// \return True if safe to merge.
 553     bool checkMergeStoreCandidatesForDependencies(
 554         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 555         SDNode *RootNode);
 556
 557     /// Merge consecutive store operations into a wide store.
 558     /// This optimization uses wide integers or vectors when possible.
 559     /// \return number of stores that were merged into a merged store (the
 560     /// affected nodes are stored as a prefix in \p StoreNodes).
 561     bool MergeConsecutiveStores(StoreSDNode *St);
 562
 563     /// Try to transform a truncation where C is a constant:
 564     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 565     ///
 566     /// \p N needs to be a truncation and its first operand an AND. Other
 567     /// requirements are checked by the function (e.g. that trunc is
 568     /// single-use) and if missed an empty SDValue is returned.
 569     SDValue distributeTruncateThroughAnd(SDNode *N);
 570
 571     /// Helper function to determine whether the target supports operation
 572     /// given by \p Opcode for type \p VT, that is, whether the operation
 573     /// is legal or custom before legalizing operations, and whether is
 574     /// legal (but not custom) after legalization.
 575     bool hasOperation(unsigned Opcode, EVT VT) {
 576       if (LegalOperations)
 577         return TLI.isOperationLegal(Opcode, VT);
 578       return TLI.isOperationLegalOrCustom(Opcode, VT);
 579     }
 580
 581   public:
 582     /// Runs the dag combiner on all nodes in the work list
 583     void Run(CombineLevel AtLevel);
 584
 585     SelectionDAG &getDAG() const { return DAG; }
 586
 587     /// Returns a type large enough to hold any valid shift amount - before type
 588     /// legalization these can be huge.
 589     EVT getShiftAmountTy(EVT LHSTy) {
 590       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 591       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 592     }
 593
 594     /// This method returns true if we are running before type legalization or
 595     /// if the specified VT is legal.
 596     bool isTypeLegal(const EVT &VT) {
 597       if (!LegalTypes) return true;
 598       return TLI.isTypeLegal(VT);
 599     }
 600
 601     /// Convenience wrapper around TargetLowering::getSetCCResultType
 602     EVT getSetCCResultType(EVT VT) const {
 603       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 604     }
 605
 606     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 607                          SDValue OrigLoad, SDValue ExtLoad,
 608                          ISD::NodeType ExtType);
 609   };
 610
 611 /// This class is a DAGUpdateListener that removes any deleted
 612 /// nodes from the worklist.
 613 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 614   DAGCombiner &DC;
 615
 616 public:
 617   explicit WorklistRemover(DAGCombiner &dc)
 618     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 619
 620   void NodeDeleted(SDNode *N, SDNode *E) override {
 621     DC.removeFromWorklist(N);
 622   }
 623 };
 624
 625 } // end anonymous namespace
 626
 627 //===----------------------------------------------------------------------===//
 628 //  TargetLowering::DAGCombinerInfo implementation
 629 //===----------------------------------------------------------------------===//
 630
 631 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 632   ((DAGCombiner*)DC)->AddToWorklist(N);
 633 }
 634
 635 SDValue TargetLowering::DAGCombinerInfo::
 636 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 637   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 638 }
 639
 640 SDValue TargetLowering::DAGCombinerInfo::
 641 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 642   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 643 }
 644
 645 SDValue TargetLowering::DAGCombinerInfo::
 646 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 647   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 648 }
 649
 650 void TargetLowering::DAGCombinerInfo::
 651 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 652   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 653 }
 654
 655 //===----------------------------------------------------------------------===//
 656 // Helper Functions
 657 //===----------------------------------------------------------------------===//
 658
 659 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 660   removeFromWorklist(N);
 661
 662   // If the operands of this node are only used by the node, they will now be
 663   // dead. Make sure to re-visit them and recursively delete dead nodes.
 664   for (const SDValue &Op : N->ops())
 665     // For an operand generating multiple values, one of the values may
 666     // become dead allowing further simplification (e.g. split index
 667     // arithmetic from an indexed load).
 668     if (Op->hasOneUse() || Op->getNumValues() > 1)
 669       AddToWorklist(Op.getNode());
 670
 671   DAG.DeleteNode(N);
 672 }
 673
 674 /// Return 1 if we can compute the negated form of the specified expression for
 675 /// the same cost as the expression itself, or 2 if we can compute the negated
 676 /// form more cheaply than the expression itself.
 677 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
 678                                const TargetLowering &TLI,
 679                                const TargetOptions *Options,
 680                                unsigned Depth = 0) {
 681   // fneg is removable even if it has multiple uses.
 682   if (Op.getOpcode() == ISD::FNEG) return 2;
 683
 684   // Don't allow anything with multiple uses unless we know it is free.
 685   EVT VT = Op.getValueType();
 686   const SDNodeFlags Flags = Op->getFlags();
 687   if (!Op.hasOneUse())
 688     if (!(Op.getOpcode() == ISD::FP_EXTEND &&
 689           TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
 690       return 0;
 691
 692   // Don't recurse exponentially.
 693   if (Depth > 6) return 0;
 694
 695   switch (Op.getOpcode()) {
 696   default: return false;
 697   case ISD::ConstantFP: {
 698     if (!LegalOperations)
 699       return 1;
 700
 701     // Don't invert constant FP values after legalization unless the target says
 702     // the negated constant is legal.
 703     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
 704       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
 705   }
 706   case ISD::FADD:
 707     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
 708       return 0;
 709
 710     // After operation legalization, it might not be legal to create new FSUBs.
 711     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
 712       return 0;
 713
 714     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 715     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 716                                     Options, Depth + 1))
 717       return V;
 718     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 719     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 720                               Depth + 1);
 721   case ISD::FSUB:
 722     // We can't turn -(A-B) into B-A when we honor signed zeros.
 723     if (!Options->NoSignedZerosFPMath &&
 724         !Flags.hasNoSignedZeros())
 725       return 0;
 726
 727     // fold (fneg (fsub A, B)) -> (fsub B, A)
 728     return 1;
 729
 730   case ISD::FMUL:
 731   case ISD::FDIV:
 732     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
 733     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 734                                     Options, Depth + 1))
 735       return V;
 736
 737     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 738                               Depth + 1);
 739
 740   case ISD::FP_EXTEND:
 741   case ISD::FP_ROUND:
 742   case ISD::FSIN:
 743     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
 744                               Depth + 1);
 745   }
 746 }
 747
 748 /// If isNegatibleForFree returns true, return the newly negated expression.
 749 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 750                                     bool LegalOperations, unsigned Depth = 0) {
 751   const TargetOptions &Options = DAG.getTarget().Options;
 752   // fneg is removable even if it has multiple uses.
 753   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
 754
 755   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
 756
 757   const SDNodeFlags Flags = Op.getNode()->getFlags();
 758
 759   switch (Op.getOpcode()) {
 760   default: llvm_unreachable("Unknown code");
 761   case ISD::ConstantFP: {
 762     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
 763     V.changeSign();
 764     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
 765   }
 766   case ISD::FADD:
 767     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
 768
 769     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 770     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 771                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
 772       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 773                          GetNegatedExpression(Op.getOperand(0), DAG,
 774                                               LegalOperations, Depth+1),
 775                          Op.getOperand(1), Flags);
 776     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 777     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 778                        GetNegatedExpression(Op.getOperand(1), DAG,
 779                                             LegalOperations, Depth+1),
 780                        Op.getOperand(0), Flags);
 781   case ISD::FSUB:
 782     // fold (fneg (fsub 0, B)) -> B
 783     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
 784       if (N0CFP->isZero())
 785         return Op.getOperand(1);
 786
 787     // fold (fneg (fsub A, B)) -> (fsub B, A)
 788     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 789                        Op.getOperand(1), Op.getOperand(0), Flags);
 790
 791   case ISD::FMUL:
 792   case ISD::FDIV:
 793     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
 794     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 795                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
 796       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 797                          GetNegatedExpression(Op.getOperand(0), DAG,
 798                                               LegalOperations, Depth+1),
 799                          Op.getOperand(1), Flags);
 800
 801     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
 802     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 803                        Op.getOperand(0),
 804                        GetNegatedExpression(Op.getOperand(1), DAG,
 805                                             LegalOperations, Depth+1), Flags);
 806
 807   case ISD::FP_EXTEND:
 808   case ISD::FSIN:
 809     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 810                        GetNegatedExpression(Op.getOperand(0), DAG,
 811                                             LegalOperations, Depth+1));
 812   case ISD::FP_ROUND:
 813       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
 814                          GetNegatedExpression(Op.getOperand(0), DAG,
 815                                               LegalOperations, Depth+1),
 816                          Op.getOperand(1));
 817   }
 818 }
 819
 820 // APInts must be the same size for most operations, this helper
 821 // function zero extends the shorter of the pair so that they match.
 822 // We provide an Offset so that we can create bitwidths that won't overflow.
 823 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 824   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 825   LHS = LHS.zextOrSelf(Bits);
 826   RHS = RHS.zextOrSelf(Bits);
 827 }
 828
 829 // Return true if this node is a setcc, or is a select_cc
 830 // that selects between the target values used for true and false, making it
 831 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 832 // the appropriate nodes based on the type of node we are checking. This
 833 // simplifies life a bit for the callers.
 834 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 835                                     SDValue &CC) const {
 836   if (N.getOpcode() == ISD::SETCC) {
 837     LHS = N.getOperand(0);
 838     RHS = N.getOperand(1);
 839     CC  = N.getOperand(2);
 840     return true;
 841   }
 842
 843   if (N.getOpcode() != ISD::SELECT_CC ||
 844       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 845       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 846     return false;
 847
 848   if (TLI.getBooleanContents(N.getValueType()) ==
 849       TargetLowering::UndefinedBooleanContent)
 850     return false;
 851
 852   LHS = N.getOperand(0);
 853   RHS = N.getOperand(1);
 854   CC  = N.getOperand(4);
 855   return true;
 856 }
 857
 858 /// Return true if this is a SetCC-equivalent operation with only one use.
 859 /// If this is true, it allows the users to invert the operation for free when
 860 /// it is profitable to do so.
 861 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 862   SDValue N0, N1, N2;
 863   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 864     return true;
 865   return false;
 866 }
 867
 868 static SDValue peekThroughBitcast(SDValue V) {
 869   while (V.getOpcode() == ISD::BITCAST)
 870     V = V.getOperand(0);
 871   return V;
 872 }
 873
 874 // Returns the SDNode if it is a constant float BuildVector
 875 // or constant float.
 876 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
 877   if (isa<ConstantFPSDNode>(N))
 878     return N.getNode();
 879   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
 880     return N.getNode();
 881   return nullptr;
 882 }
 883
 884 // Determines if it is a constant integer or a build vector of constant
 885 // integers (and undefs).
 886 // Do not permit build vector implicit truncation.
 887 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 888   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 889     return !(Const->isOpaque() && NoOpaques);
 890   if (N.getOpcode() != ISD::BUILD_VECTOR)
 891     return false;
 892   unsigned BitWidth = N.getScalarValueSizeInBits();
 893   for (const SDValue &Op : N->op_values()) {
 894     if (Op.isUndef())
 895       continue;
 896     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 897     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 898         (Const->isOpaque() && NoOpaques))
 899       return false;
 900   }
 901   return true;
 902 }
 903
 904 // Determines if it is a constant null integer or a splatted vector of a
 905 // constant null integer (with no undefs).
 906 // Build vector implicit truncation is not an issue for null values.
 907 static bool isNullConstantOrNullSplatConstant(SDValue N) {
 908   // TODO: may want to use peekThroughBitcast() here.
 909   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
 910     return Splat->isNullValue();
 911   return false;
 912 }
 913
 914 // Determines if it is a constant integer of one or a splatted vector of a
 915 // constant integer of one (with no undefs).
 916 // Do not permit build vector implicit truncation.
 917 static bool isOneConstantOrOneSplatConstant(SDValue N) {
 918   // TODO: may want to use peekThroughBitcast() here.
 919   unsigned BitWidth = N.getScalarValueSizeInBits();
 920   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
 921     return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
 922   return false;
 923 }
 924
 925 // Determines if it is a constant integer of all ones or a splatted vector of a
 926 // constant integer of all ones (with no undefs).
 927 // Do not permit build vector implicit truncation.
 928 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
 929   N = peekThroughBitcast(N);
 930   unsigned BitWidth = N.getScalarValueSizeInBits();
 931   if (ConstantSDNode *Splat = isConstOrConstSplat(N))
 932     return Splat->isAllOnesValue() &&
 933            Splat->getAPIntValue().getBitWidth() == BitWidth;
 934   return false;
 935 }
 936
 937 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 938 // undef's.
 939 static bool isAnyConstantBuildVector(const SDNode *N) {
 940   return ISD::isBuildVectorOfConstantSDNodes(N) ||
 941          ISD::isBuildVectorOfConstantFPSDNodes(N);
 942 }
 943
 944 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 945                                     SDValue N1, SDNodeFlags Flags) {
 946   // Don't reassociate reductions.
 947   if (Flags.hasVectorReduction())
 948     return SDValue();
 949
 950   EVT VT = N0.getValueType();
 951   if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
 952     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
 953       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
 954         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
 955         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
 956           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
 957         return SDValue();
 958       }
 959       if (N0.hasOneUse()) {
 960         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
 961         // use
 962         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
 963         if (!OpNode.getNode())
 964           return SDValue();
 965         AddToWorklist(OpNode.getNode());
 966         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
 967       }
 968     }
 969   }
 970
 971   if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
 972     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
 973       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
 974         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
 975         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
 976           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
 977         return SDValue();
 978       }
 979       if (N1.hasOneUse()) {
 980         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
 981         // use
 982         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
 983         if (!OpNode.getNode())
 984           return SDValue();
 985         AddToWorklist(OpNode.getNode());
 986         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
 987       }
 988     }
 989   }
 990
 991   return SDValue();
 992 }
 993
 994 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 995                                bool AddTo) {
 996   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
 997   ++NodesCombined;
 998   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
 999              To[0].getNode()->dump(&DAG);
1000              dbgs() << " and " << NumTo - 1 << " other values\n");
1001   for (unsigned i = 0, e = NumTo; i != e; ++i)
1002     assert((!To[i].getNode() ||
1003             N->getValueType(i) == To[i].getValueType()) &&
1004            "Cannot combine value to value of different type!");
1005
1006   WorklistRemover DeadNodes(*this);
1007   DAG.ReplaceAllUsesWith(N, To);
1008   if (AddTo) {
1009     // Push the new nodes and any users onto the worklist
1010     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1011       if (To[i].getNode()) {
1012         AddToWorklist(To[i].getNode());
1013         AddUsersToWorklist(To[i].getNode());
1014       }
1015     }
1016   }
1017
1018   // Finally, if the node is now dead, remove it from the graph.  The node
1019   // may not be dead if the replacement process recursively simplified to
1020   // something else needing this node.
1021   if (N->use_empty())
1022     deleteAndRecombine(N);
1023   return SDValue(N, 0);
1024 }
1025
1026 void DAGCombiner::
1027 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1028   // Replace all uses.  If any nodes become isomorphic to other nodes and
1029   // are deleted, make sure to remove them from our worklist.
1030   WorklistRemover DeadNodes(*this);
1031   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1032
1033   // Push the new node and any (possibly new) users onto the worklist.
1034   AddToWorklist(TLO.New.getNode());
1035   AddUsersToWorklist(TLO.New.getNode());
1036
1037   // Finally, if the node is now dead, remove it from the graph.  The node
1038   // may not be dead if the replacement process recursively simplified to
1039   // something else needing this node.
1040   if (TLO.Old.getNode()->use_empty())
1041     deleteAndRecombine(TLO.Old.getNode());
1042 }
1043
1044 /// Check the specified integer node value to see if it can be simplified or if
1045 /// things it uses can be simplified by bit propagation. If so, return true.
1046 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1047   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1048   KnownBits Known;
1049   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1050     return false;
1051
1052   // Revisit the node.
1053   AddToWorklist(Op.getNode());
1054
1055   // Replace the old value with the new one.
1056   ++NodesCombined;
1057   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1058              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1059              dbgs() << '\n');
1060
1061   CommitTargetLoweringOpt(TLO);
1062   return true;
1063 }
1064
1065 /// Check the specified vector node value to see if it can be simplified or
1066 /// if things it uses can be simplified as it only uses some of the elements.
1067 /// If so, return true.
1068 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1069                                              bool AssumeSingleUse) {
1070   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1071   APInt KnownUndef, KnownZero;
1072   if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1073                                       0, AssumeSingleUse))
1074     return false;
1075
1076   // Revisit the node.
1077   AddToWorklist(Op.getNode());
1078
1079   // Replace the old value with the new one.
1080   ++NodesCombined;
1081   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1082              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1083              dbgs() << '\n');
1084
1085   CommitTargetLoweringOpt(TLO);
1086   return true;
1087 }
1088
1089 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1090   SDLoc DL(Load);
1091   EVT VT = Load->getValueType(0);
1092   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1093
1094   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1095              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1096   WorklistRemover DeadNodes(*this);
1097   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1098   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1099   deleteAndRecombine(Load);
1100   AddToWorklist(Trunc.getNode());
1101 }
1102
1103 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1104   Replace = false;
1105   SDLoc DL(Op);
1106   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1107     LoadSDNode *LD = cast<LoadSDNode>(Op);
1108     EVT MemVT = LD->getMemoryVT();
1109     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1110                                                       : LD->getExtensionType();
1111     Replace = true;
1112     return DAG.getExtLoad(ExtType, DL, PVT,
1113                           LD->getChain(), LD->getBasePtr(),
1114                           MemVT, LD->getMemOperand());
1115   }
1116
1117   unsigned Opc = Op.getOpcode();
1118   switch (Opc) {
1119   default: break;
1120   case ISD::AssertSext:
1121     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1122       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1123     break;
1124   case ISD::AssertZext:
1125     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1126       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1127     break;
1128   case ISD::Constant: {
1129     unsigned ExtOpc =
1130       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1131     return DAG.getNode(ExtOpc, DL, PVT, Op);
1132   }
1133   }
1134
1135   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1136     return SDValue();
1137   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1138 }
1139
1140 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1141   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1142     return SDValue();
1143   EVT OldVT = Op.getValueType();
1144   SDLoc DL(Op);
1145   bool Replace = false;
1146   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1147   if (!NewOp.getNode())
1148     return SDValue();
1149   AddToWorklist(NewOp.getNode());
1150
1151   if (Replace)
1152     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1153   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1154                      DAG.getValueType(OldVT));
1155 }
1156
1157 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1158   EVT OldVT = Op.getValueType();
1159   SDLoc DL(Op);
1160   bool Replace = false;
1161   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1162   if (!NewOp.getNode())
1163     return SDValue();
1164   AddToWorklist(NewOp.getNode());
1165
1166   if (Replace)
1167     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1168   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1169 }
1170
1171 /// Promote the specified integer binary operation if the target indicates it is
1172 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1173 /// i32 since i16 instructions are longer.
1174 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1175   if (!LegalOperations)
1176     return SDValue();
1177
1178   EVT VT = Op.getValueType();
1179   if (VT.isVector() || !VT.isInteger())
1180     return SDValue();
1181
1182   // If operation type is 'undesirable', e.g. i16 on x86, consider
1183   // promoting it.
1184   unsigned Opc = Op.getOpcode();
1185   if (TLI.isTypeDesirableForOp(Opc, VT))
1186     return SDValue();
1187
1188   EVT PVT = VT;
1189   // Consult target whether it is a good idea to promote this operation and
1190   // what's the right type to promote it to.
1191   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1192     assert(PVT != VT && "Don't know what type to promote to!");
1193
1194     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1195
1196     bool Replace0 = false;
1197     SDValue N0 = Op.getOperand(0);
1198     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1199
1200     bool Replace1 = false;
1201     SDValue N1 = Op.getOperand(1);
1202     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1203     SDLoc DL(Op);
1204
1205     SDValue RV =
1206         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1207
1208     // We are always replacing N0/N1's use in N and only need
1209     // additional replacements if there are additional uses.
1210     Replace0 &= !N0->hasOneUse();
1211     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1212
1213     // Combine Op here so it is preserved past replacements.
1214     CombineTo(Op.getNode(), RV);
1215
1216     // If operands have a use ordering, make sure we deal with
1217     // predecessor first.
1218     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1219       std::swap(N0, N1);
1220       std::swap(NN0, NN1);
1221     }
1222
1223     if (Replace0) {
1224       AddToWorklist(NN0.getNode());
1225       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1226     }
1227     if (Replace1) {
1228       AddToWorklist(NN1.getNode());
1229       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1230     }
1231     return Op;
1232   }
1233   return SDValue();
1234 }
1235
1236 /// Promote the specified integer shift operation if the target indicates it is
1237 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1238 /// i32 since i16 instructions are longer.
1239 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1240   if (!LegalOperations)
1241     return SDValue();
1242
1243   EVT VT = Op.getValueType();
1244   if (VT.isVector() || !VT.isInteger())
1245     return SDValue();
1246
1247   // If operation type is 'undesirable', e.g. i16 on x86, consider
1248   // promoting it.
1249   unsigned Opc = Op.getOpcode();
1250   if (TLI.isTypeDesirableForOp(Opc, VT))
1251     return SDValue();
1252
1253   EVT PVT = VT;
1254   // Consult target whether it is a good idea to promote this operation and
1255   // what's the right type to promote it to.
1256   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1257     assert(PVT != VT && "Don't know what type to promote to!");
1258
1259     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1260
1261     bool Replace = false;
1262     SDValue N0 = Op.getOperand(0);
1263     SDValue N1 = Op.getOperand(1);
1264     if (Opc == ISD::SRA)
1265       N0 = SExtPromoteOperand(N0, PVT);
1266     else if (Opc == ISD::SRL)
1267       N0 = ZExtPromoteOperand(N0, PVT);
1268     else
1269       N0 = PromoteOperand(N0, PVT, Replace);
1270
1271     if (!N0.getNode())
1272       return SDValue();
1273
1274     SDLoc DL(Op);
1275     SDValue RV =
1276         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1277
1278     AddToWorklist(N0.getNode());
1279     if (Replace)
1280       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1281
1282     // Deal with Op being deleted.
1283     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1284       return RV;
1285   }
1286   return SDValue();
1287 }
1288
1289 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1290   if (!LegalOperations)
1291     return SDValue();
1292
1293   EVT VT = Op.getValueType();
1294   if (VT.isVector() || !VT.isInteger())
1295     return SDValue();
1296
1297   // If operation type is 'undesirable', e.g. i16 on x86, consider
1298   // promoting it.
1299   unsigned Opc = Op.getOpcode();
1300   if (TLI.isTypeDesirableForOp(Opc, VT))
1301     return SDValue();
1302
1303   EVT PVT = VT;
1304   // Consult target whether it is a good idea to promote this operation and
1305   // what's the right type to promote it to.
1306   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1307     assert(PVT != VT && "Don't know what type to promote to!");
1308     // fold (aext (aext x)) -> (aext x)
1309     // fold (aext (zext x)) -> (zext x)
1310     // fold (aext (sext x)) -> (sext x)
1311     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1312     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1313   }
1314   return SDValue();
1315 }
1316
1317 bool DAGCombiner::PromoteLoad(SDValue Op) {
1318   if (!LegalOperations)
1319     return false;
1320
1321   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1322     return false;
1323
1324   EVT VT = Op.getValueType();
1325   if (VT.isVector() || !VT.isInteger())
1326     return false;
1327
1328   // If operation type is 'undesirable', e.g. i16 on x86, consider
1329   // promoting it.
1330   unsigned Opc = Op.getOpcode();
1331   if (TLI.isTypeDesirableForOp(Opc, VT))
1332     return false;
1333
1334   EVT PVT = VT;
1335   // Consult target whether it is a good idea to promote this operation and
1336   // what's the right type to promote it to.
1337   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1338     assert(PVT != VT && "Don't know what type to promote to!");
1339
1340     SDLoc DL(Op);
1341     SDNode *N = Op.getNode();
1342     LoadSDNode *LD = cast<LoadSDNode>(N);
1343     EVT MemVT = LD->getMemoryVT();
1344     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1345                                                       : LD->getExtensionType();
1346     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1347                                    LD->getChain(), LD->getBasePtr(),
1348                                    MemVT, LD->getMemOperand());
1349     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1350
1351     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1352                Result.getNode()->dump(&DAG); dbgs() << '\n');
1353     WorklistRemover DeadNodes(*this);
1354     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1355     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1356     deleteAndRecombine(N);
1357     AddToWorklist(Result.getNode());
1358     return true;
1359   }
1360   return false;
1361 }
1362
1363 /// Recursively delete a node which has no uses and any operands for
1364 /// which it is the only use.
1365 ///
1366 /// Note that this both deletes the nodes and removes them from the worklist.
1367 /// It also adds any nodes who have had a user deleted to the worklist as they
1368 /// may now have only one use and subject to other combines.
1369 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1370   if (!N->use_empty())
1371     return false;
1372
1373   SmallSetVector<SDNode *, 16> Nodes;
1374   Nodes.insert(N);
1375   do {
1376     N = Nodes.pop_back_val();
1377     if (!N)
1378       continue;
1379
1380     if (N->use_empty()) {
1381       for (const SDValue &ChildN : N->op_values())
1382         Nodes.insert(ChildN.getNode());
1383
1384       removeFromWorklist(N);
1385       DAG.DeleteNode(N);
1386     } else {
1387       AddToWorklist(N);
1388     }
1389   } while (!Nodes.empty());
1390   return true;
1391 }
1392
1393 //===----------------------------------------------------------------------===//
1394 //  Main DAG Combiner implementation
1395 //===----------------------------------------------------------------------===//
1396
1397 void DAGCombiner::Run(CombineLevel AtLevel) {
1398   // set the instance variables, so that the various visit routines may use it.
1399   Level = AtLevel;
1400   LegalOperations = Level >= AfterLegalizeVectorOps;
1401   LegalTypes = Level >= AfterLegalizeTypes;
1402
1403   // Add all the dag nodes to the worklist.
1404   for (SDNode &Node : DAG.allnodes())
1405     AddToWorklist(&Node);
1406
1407   // Create a dummy node (which is not added to allnodes), that adds a reference
1408   // to the root node, preventing it from being deleted, and tracking any
1409   // changes of the root.
1410   HandleSDNode Dummy(DAG.getRoot());
1411
1412   // While the worklist isn't empty, find a node and try to combine it.
1413   while (!WorklistMap.empty()) {
1414     SDNode *N;
1415     // The Worklist holds the SDNodes in order, but it may contain null entries.
1416     do {
1417       N = Worklist.pop_back_val();
1418     } while (!N);
1419
1420     bool GoodWorklistEntry = WorklistMap.erase(N);
1421     (void)GoodWorklistEntry;
1422     assert(GoodWorklistEntry &&
1423            "Found a worklist entry without a corresponding map entry!");
1424
1425     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1426     // N is deleted from the DAG, since they too may now be dead or may have a
1427     // reduced number of uses, allowing other xforms.
1428     if (recursivelyDeleteUnusedNodes(N))
1429       continue;
1430
1431     WorklistRemover DeadNodes(*this);
1432
1433     // If this combine is running after legalizing the DAG, re-legalize any
1434     // nodes pulled off the worklist.
1435     if (Level == AfterLegalizeDAG) {
1436       SmallSetVector<SDNode *, 16> UpdatedNodes;
1437       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1438
1439       for (SDNode *LN : UpdatedNodes) {
1440         AddToWorklist(LN);
1441         AddUsersToWorklist(LN);
1442       }
1443       if (!NIsValid)
1444         continue;
1445     }
1446
1447     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1448
1449     // Add any operands of the new node which have not yet been combined to the
1450     // worklist as well. Because the worklist uniques things already, this
1451     // won't repeatedly process the same operand.
1452     CombinedNodes.insert(N);
1453     for (const SDValue &ChildN : N->op_values())
1454       if (!CombinedNodes.count(ChildN.getNode()))
1455         AddToWorklist(ChildN.getNode());
1456
1457     SDValue RV = combine(N);
1458
1459     if (!RV.getNode())
1460       continue;
1461
1462     ++NodesCombined;
1463
1464     // If we get back the same node we passed in, rather than a new node or
1465     // zero, we know that the node must have defined multiple values and
1466     // CombineTo was used.  Since CombineTo takes care of the worklist
1467     // mechanics for us, we have no work to do in this case.
1468     if (RV.getNode() == N)
1469       continue;
1470
1471     assert(N->getOpcode() != ISD::DELETED_NODE &&
1472            RV.getOpcode() != ISD::DELETED_NODE &&
1473            "Node was deleted but visit returned new node!");
1474
1475     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1476
1477     if (N->getNumValues() == RV.getNode()->getNumValues())
1478       DAG.ReplaceAllUsesWith(N, RV.getNode());
1479     else {
1480       assert(N->getValueType(0) == RV.getValueType() &&
1481              N->getNumValues() == 1 && "Type mismatch");
1482       DAG.ReplaceAllUsesWith(N, &RV);
1483     }
1484
1485     // Push the new node and any users onto the worklist
1486     AddToWorklist(RV.getNode());
1487     AddUsersToWorklist(RV.getNode());
1488
1489     // Finally, if the node is now dead, remove it from the graph.  The node
1490     // may not be dead if the replacement process recursively simplified to
1491     // something else needing this node. This will also take care of adding any
1492     // operands which have lost a user to the worklist.
1493     recursivelyDeleteUnusedNodes(N);
1494   }
1495
1496   // If the root changed (e.g. it was a dead load, update the root).
1497   DAG.setRoot(Dummy.getValue());
1498   DAG.RemoveDeadNodes();
1499 }
1500
1501 SDValue DAGCombiner::visit(SDNode *N) {
1502   switch (N->getOpcode()) {
1503   default: break;
1504   case ISD::TokenFactor:        return visitTokenFactor(N);
1505   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1506   case ISD::ADD:                return visitADD(N);
1507   case ISD::SUB:                return visitSUB(N);
1508   case ISD::ADDC:               return visitADDC(N);
1509   case ISD::UADDO:              return visitUADDO(N);
1510   case ISD::SUBC:               return visitSUBC(N);
1511   case ISD::USUBO:              return visitUSUBO(N);
1512   case ISD::ADDE:               return visitADDE(N);
1513   case ISD::ADDCARRY:           return visitADDCARRY(N);
1514   case ISD::SUBE:               return visitSUBE(N);
1515   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1516   case ISD::MUL:                return visitMUL(N);
1517   case ISD::SDIV:               return visitSDIV(N);
1518   case ISD::UDIV:               return visitUDIV(N);
1519   case ISD::SREM:
1520   case ISD::UREM:               return visitREM(N);
1521   case ISD::MULHU:              return visitMULHU(N);
1522   case ISD::MULHS:              return visitMULHS(N);
1523   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1524   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1525   case ISD::SMULO:              return visitSMULO(N);
1526   case ISD::UMULO:              return visitUMULO(N);
1527   case ISD::SMIN:
1528   case ISD::SMAX:
1529   case ISD::UMIN:
1530   case ISD::UMAX:               return visitIMINMAX(N);
1531   case ISD::AND:                return visitAND(N);
1532   case ISD::OR:                 return visitOR(N);
1533   case ISD::XOR:                return visitXOR(N);
1534   case ISD::SHL:                return visitSHL(N);
1535   case ISD::SRA:                return visitSRA(N);
1536   case ISD::SRL:                return visitSRL(N);
1537   case ISD::ROTR:
1538   case ISD::ROTL:               return visitRotate(N);
1539   case ISD::ABS:                return visitABS(N);
1540   case ISD::BSWAP:              return visitBSWAP(N);
1541   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1542   case ISD::CTLZ:               return visitCTLZ(N);
1543   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1544   case ISD::CTTZ:               return visitCTTZ(N);
1545   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1546   case ISD::CTPOP:              return visitCTPOP(N);
1547   case ISD::SELECT:             return visitSELECT(N);
1548   case ISD::VSELECT:            return visitVSELECT(N);
1549   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1550   case ISD::SETCC:              return visitSETCC(N);
1551   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1552   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1553   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1554   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1555   case ISD::AssertSext:
1556   case ISD::AssertZext:         return visitAssertExt(N);
1557   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1558   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1559   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1560   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1561   case ISD::BITCAST:            return visitBITCAST(N);
1562   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1563   case ISD::FADD:               return visitFADD(N);
1564   case ISD::FSUB:               return visitFSUB(N);
1565   case ISD::FMUL:               return visitFMUL(N);
1566   case ISD::FMA:                return visitFMA(N);
1567   case ISD::FDIV:               return visitFDIV(N);
1568   case ISD::FREM:               return visitFREM(N);
1569   case ISD::FSQRT:              return visitFSQRT(N);
1570   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1571   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1572   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1573   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1574   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1575   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1576   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1577   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1578   case ISD::FNEG:               return visitFNEG(N);
1579   case ISD::FABS:               return visitFABS(N);
1580   case ISD::FFLOOR:             return visitFFLOOR(N);
1581   case ISD::FMINNUM:            return visitFMINNUM(N);
1582   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1583   case ISD::FCEIL:              return visitFCEIL(N);
1584   case ISD::FTRUNC:             return visitFTRUNC(N);
1585   case ISD::BRCOND:             return visitBRCOND(N);
1586   case ISD::BR_CC:              return visitBR_CC(N);
1587   case ISD::LOAD:               return visitLOAD(N);
1588   case ISD::STORE:              return visitSTORE(N);
1589   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1590   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1591   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1592   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1593   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1594   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1595   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1596   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1597   case ISD::MGATHER:            return visitMGATHER(N);
1598   case ISD::MLOAD:              return visitMLOAD(N);
1599   case ISD::MSCATTER:           return visitMSCATTER(N);
1600   case ISD::MSTORE:             return visitMSTORE(N);
1601   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1602   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1603   }
1604   return SDValue();
1605 }
1606
1607 SDValue DAGCombiner::combine(SDNode *N) {
1608   SDValue RV = visit(N);
1609
1610   // If nothing happened, try a target-specific DAG combine.
1611   if (!RV.getNode()) {
1612     assert(N->getOpcode() != ISD::DELETED_NODE &&
1613            "Node was deleted but visit returned NULL!");
1614
1615     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1616         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1617
1618       // Expose the DAG combiner to the target combiner impls.
1619       TargetLowering::DAGCombinerInfo
1620         DagCombineInfo(DAG, Level, false, this);
1621
1622       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1623     }
1624   }
1625
1626   // If nothing happened still, try promoting the operation.
1627   if (!RV.getNode()) {
1628     switch (N->getOpcode()) {
1629     default: break;
1630     case ISD::ADD:
1631     case ISD::SUB:
1632     case ISD::MUL:
1633     case ISD::AND:
1634     case ISD::OR:
1635     case ISD::XOR:
1636       RV = PromoteIntBinOp(SDValue(N, 0));
1637       break;
1638     case ISD::SHL:
1639     case ISD::SRA:
1640     case ISD::SRL:
1641       RV = PromoteIntShiftOp(SDValue(N, 0));
1642       break;
1643     case ISD::SIGN_EXTEND:
1644     case ISD::ZERO_EXTEND:
1645     case ISD::ANY_EXTEND:
1646       RV = PromoteExtend(SDValue(N, 0));
1647       break;
1648     case ISD::LOAD:
1649       if (PromoteLoad(SDValue(N, 0)))
1650         RV = SDValue(N, 0);
1651       break;
1652     }
1653   }
1654
1655   // If N is a commutative binary node, try eliminate it if the commuted
1656   // version is already present in the DAG.
1657   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1658       N->getNumValues() == 1) {
1659     SDValue N0 = N->getOperand(0);
1660     SDValue N1 = N->getOperand(1);
1661
1662     // Constant operands are canonicalized to RHS.
1663     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1664       SDValue Ops[] = {N1, N0};
1665       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1666                                             N->getFlags());
1667       if (CSENode)
1668         return SDValue(CSENode, 0);
1669     }
1670   }
1671
1672   return RV;
1673 }
1674
1675 /// Given a node, return its input chain if it has one, otherwise return a null
1676 /// sd operand.
1677 static SDValue getInputChainForNode(SDNode *N) {
1678   if (unsigned NumOps = N->getNumOperands()) {
1679     if (N->getOperand(0).getValueType() == MVT::Other)
1680       return N->getOperand(0);
1681     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1682       return N->getOperand(NumOps-1);
1683     for (unsigned i = 1; i < NumOps-1; ++i)
1684       if (N->getOperand(i).getValueType() == MVT::Other)
1685         return N->getOperand(i);
1686   }
1687   return SDValue();
1688 }
1689
1690 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1691   // If N has two operands, where one has an input chain equal to the other,
1692   // the 'other' chain is redundant.
1693   if (N->getNumOperands() == 2) {
1694     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1695       return N->getOperand(0);
1696     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1697       return N->getOperand(1);
1698   }
1699
1700   // Don't simplify token factors if optnone.
1701   if (OptLevel == CodeGenOpt::None)
1702     return SDValue();
1703
1704   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1705   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1706   SmallPtrSet<SDNode*, 16> SeenOps;
1707   bool Changed = false;             // If we should replace this token factor.
1708
1709   // Start out with this token factor.
1710   TFs.push_back(N);
1711
1712   // Iterate through token factors.  The TFs grows when new token factors are
1713   // encountered.
1714   for (unsigned i = 0; i < TFs.size(); ++i) {
1715     SDNode *TF = TFs[i];
1716
1717     // Check each of the operands.
1718     for (const SDValue &Op : TF->op_values()) {
1719       switch (Op.getOpcode()) {
1720       case ISD::EntryToken:
1721         // Entry tokens don't need to be added to the list. They are
1722         // redundant.
1723         Changed = true;
1724         break;
1725
1726       case ISD::TokenFactor:
1727         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1728           // Queue up for processing.
1729           TFs.push_back(Op.getNode());
1730           // Clean up in case the token factor is removed.
1731           AddToWorklist(Op.getNode());
1732           Changed = true;
1733           break;
1734         }
1735         LLVM_FALLTHROUGH;
1736
1737       default:
1738         // Only add if it isn't already in the list.
1739         if (SeenOps.insert(Op.getNode()).second)
1740           Ops.push_back(Op);
1741         else
1742           Changed = true;
1743         break;
1744       }
1745     }
1746   }
1747
1748   // Remove Nodes that are chained to another node in the list. Do so
1749   // by walking up chains breath-first stopping when we've seen
1750   // another operand. In general we must climb to the EntryNode, but we can exit
1751   // early if we find all remaining work is associated with just one operand as
1752   // no further pruning is possible.
1753
1754   // List of nodes to search through and original Ops from which they originate.
1755   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1756   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1757   SmallPtrSet<SDNode *, 16> SeenChains;
1758   bool DidPruneOps = false;
1759
1760   unsigned NumLeftToConsider = 0;
1761   for (const SDValue &Op : Ops) {
1762     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1763     OpWorkCount.push_back(1);
1764   }
1765
1766   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1767     // If this is an Op, we can remove the op from the list. Remark any
1768     // search associated with it as from the current OpNumber.
1769     if (SeenOps.count(Op) != 0) {
1770       Changed = true;
1771       DidPruneOps = true;
1772       unsigned OrigOpNumber = 0;
1773       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1774         OrigOpNumber++;
1775       assert((OrigOpNumber != Ops.size()) &&
1776              "expected to find TokenFactor Operand");
1777       // Re-mark worklist from OrigOpNumber to OpNumber
1778       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1779         if (Worklist[i].second == OrigOpNumber) {
1780           Worklist[i].second = OpNumber;
1781         }
1782       }
1783       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1784       OpWorkCount[OrigOpNumber] = 0;
1785       NumLeftToConsider--;
1786     }
1787     // Add if it's a new chain
1788     if (SeenChains.insert(Op).second) {
1789       OpWorkCount[OpNumber]++;
1790       Worklist.push_back(std::make_pair(Op, OpNumber));
1791     }
1792   };
1793
1794   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1795     // We need at least be consider at least 2 Ops to prune.
1796     if (NumLeftToConsider <= 1)
1797       break;
1798     auto CurNode = Worklist[i].first;
1799     auto CurOpNumber = Worklist[i].second;
1800     assert((OpWorkCount[CurOpNumber] > 0) &&
1801            "Node should not appear in worklist");
1802     switch (CurNode->getOpcode()) {
1803     case ISD::EntryToken:
1804       // Hitting EntryToken is the only way for the search to terminate without
1805       // hitting
1806       // another operand's search. Prevent us from marking this operand
1807       // considered.
1808       NumLeftToConsider++;
1809       break;
1810     case ISD::TokenFactor:
1811       for (const SDValue &Op : CurNode->op_values())
1812         AddToWorklist(i, Op.getNode(), CurOpNumber);
1813       break;
1814     case ISD::CopyFromReg:
1815     case ISD::CopyToReg:
1816       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1817       break;
1818     default:
1819       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1820         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1821       break;
1822     }
1823     OpWorkCount[CurOpNumber]--;
1824     if (OpWorkCount[CurOpNumber] == 0)
1825       NumLeftToConsider--;
1826   }
1827
1828   // If we've changed things around then replace token factor.
1829   if (Changed) {
1830     SDValue Result;
1831     if (Ops.empty()) {
1832       // The entry token is the only possible outcome.
1833       Result = DAG.getEntryNode();
1834     } else {
1835       if (DidPruneOps) {
1836         SmallVector<SDValue, 8> PrunedOps;
1837         //
1838         for (const SDValue &Op : Ops) {
1839           if (SeenChains.count(Op.getNode()) == 0)
1840             PrunedOps.push_back(Op);
1841         }
1842         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1843       } else {
1844         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1845       }
1846     }
1847     return Result;
1848   }
1849   return SDValue();
1850 }
1851
1852 /// MERGE_VALUES can always be eliminated.
1853 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1854   WorklistRemover DeadNodes(*this);
1855   // Replacing results may cause a different MERGE_VALUES to suddenly
1856   // be CSE'd with N, and carry its uses with it. Iterate until no
1857   // uses remain, to ensure that the node can be safely deleted.
1858   // First add the users of this node to the work list so that they
1859   // can be tried again once they have new operands.
1860   AddUsersToWorklist(N);
1861   do {
1862     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1863       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1864   } while (!N->use_empty());
1865   deleteAndRecombine(N);
1866   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1867 }
1868
1869 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1870 /// ConstantSDNode pointer else nullptr.
1871 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1872   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1873   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1874 }
1875
1876 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1877   auto BinOpcode = BO->getOpcode();
1878   assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1879           BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1880           BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1881           BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1882           BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1883           BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1884           BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1885           BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1886           BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1887          "Unexpected binary operator");
1888
1889   // Don't do this unless the old select is going away. We want to eliminate the
1890   // binary operator, not replace a binop with a select.
1891   // TODO: Handle ISD::SELECT_CC.
1892   unsigned SelOpNo = 0;
1893   SDValue Sel = BO->getOperand(0);
1894   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1895     SelOpNo = 1;
1896     Sel = BO->getOperand(1);
1897   }
1898
1899   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1900     return SDValue();
1901
1902   SDValue CT = Sel.getOperand(1);
1903   if (!isConstantOrConstantVector(CT, true) &&
1904       !isConstantFPBuildVectorOrConstantFP(CT))
1905     return SDValue();
1906
1907   SDValue CF = Sel.getOperand(2);
1908   if (!isConstantOrConstantVector(CF, true) &&
1909       !isConstantFPBuildVectorOrConstantFP(CF))
1910     return SDValue();
1911
1912   // Bail out if any constants are opaque because we can't constant fold those.
1913   // The exception is "and" and "or" with either 0 or -1 in which case we can
1914   // propagate non constant operands into select. I.e.:
1915   // and (select Cond, 0, -1), X --> select Cond, 0, X
1916   // or X, (select Cond, -1, 0) --> select Cond, -1, X
1917   bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1918                          (isNullConstantOrNullSplatConstant(CT) ||
1919                           isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
1920                          (isNullConstantOrNullSplatConstant(CF) ||
1921                           isAllOnesConstantOrAllOnesSplatConstant(CF));
1922
1923   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1924   if (!CanFoldNonConst &&
1925       !isConstantOrConstantVector(CBO, true) &&
1926       !isConstantFPBuildVectorOrConstantFP(CBO))
1927     return SDValue();
1928
1929   EVT VT = Sel.getValueType();
1930
1931   // In case of shift value and shift amount may have different VT. For instance
1932   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1933   // swapped operands and value types do not match. NB: x86 is fine if operands
1934   // are not swapped with shift amount VT being not bigger than shifted value.
1935   // TODO: that is possible to check for a shift operation, correct VTs and
1936   // still perform optimization on x86 if needed.
1937   if (SelOpNo && VT != CBO.getValueType())
1938     return SDValue();
1939
1940   // We have a select-of-constants followed by a binary operator with a
1941   // constant. Eliminate the binop by pulling the constant math into the select.
1942   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1943   SDLoc DL(Sel);
1944   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1945                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1946   if (!CanFoldNonConst && !NewCT.isUndef() &&
1947       !isConstantOrConstantVector(NewCT, true) &&
1948       !isConstantFPBuildVectorOrConstantFP(NewCT))
1949     return SDValue();
1950
1951   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1952                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1953   if (!CanFoldNonConst && !NewCF.isUndef() &&
1954       !isConstantOrConstantVector(NewCF, true) &&
1955       !isConstantFPBuildVectorOrConstantFP(NewCF))
1956     return SDValue();
1957
1958   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1959 }
1960
1961 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
1962   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1963          "Expecting add or sub");
1964
1965   // Match a constant operand and a zext operand for the math instruction:
1966   // add Z, C
1967   // sub C, Z
1968   bool IsAdd = N->getOpcode() == ISD::ADD;
1969   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1970   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1971   auto *CN = dyn_cast<ConstantSDNode>(C);
1972   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1973     return SDValue();
1974
1975   // Match the zext operand as a setcc of a boolean.
1976   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1977       Z.getOperand(0).getValueType() != MVT::i1)
1978     return SDValue();
1979
1980   // Match the compare as: setcc (X & 1), 0, eq.
1981   SDValue SetCC = Z.getOperand(0);
1982   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1983   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1984       SetCC.getOperand(0).getOpcode() != ISD::AND ||
1985       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1986     return SDValue();
1987
1988   // We are adding/subtracting a constant and an inverted low bit. Turn that
1989   // into a subtract/add of the low bit with incremented/decremented constant:
1990   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1991   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1992   EVT VT = C.getValueType();
1993   SDLoc DL(N);
1994   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1995   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1996                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1997   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1998 }
1999
2000 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2001 /// a shift and add with a different constant.
2002 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2003   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2004          "Expecting add or sub");
2005
2006   // We need a constant operand for the add/sub, and the other operand is a
2007   // logical shift right: add (srl), C or sub C, (srl).
2008   bool IsAdd = N->getOpcode() == ISD::ADD;
2009   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2010   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2011   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2012   if (!C || ShiftOp.getOpcode() != ISD::SRL)
2013     return SDValue();
2014
2015   // The shift must be of a 'not' value.
2016   // TODO: Use isBitwiseNot() if it works with vectors.
2017   SDValue Not = ShiftOp.getOperand(0);
2018   if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR ||
2019       !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1)))
2020     return SDValue();
2021
2022   // The shift must be moving the sign bit to the least-significant-bit.
2023   EVT VT = ShiftOp.getValueType();
2024   SDValue ShAmt = ShiftOp.getOperand(1);
2025   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2026   if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2027     return SDValue();
2028
2029   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2030   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2031   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2032   SDLoc DL(N);
2033   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2034   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2035   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2036   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2037 }
2038
2039 SDValue DAGCombiner::visitADD(SDNode *N) {
2040   SDValue N0 = N->getOperand(0);
2041   SDValue N1 = N->getOperand(1);
2042   EVT VT = N0.getValueType();
2043   SDLoc DL(N);
2044
2045   // fold vector ops
2046   if (VT.isVector()) {
2047     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2048       return FoldedVOp;
2049
2050     // fold (add x, 0) -> x, vector edition
2051     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2052       return N0;
2053     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2054       return N1;
2055   }
2056
2057   // fold (add x, undef) -> undef
2058   if (N0.isUndef())
2059     return N0;
2060
2061   if (N1.isUndef())
2062     return N1;
2063
2064   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2065     // canonicalize constant to RHS
2066     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2067       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2068     // fold (add c1, c2) -> c1+c2
2069     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2070                                       N1.getNode());
2071   }
2072
2073   // fold (add x, 0) -> x
2074   if (isNullConstant(N1))
2075     return N0;
2076
2077   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2078     // fold ((c1-A)+c2) -> (c1+c2)-A
2079     if (N0.getOpcode() == ISD::SUB &&
2080         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2081       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2082       return DAG.getNode(ISD::SUB, DL, VT,
2083                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2084                          N0.getOperand(1));
2085     }
2086
2087     // add (sext i1 X), 1 -> zext (not i1 X)
2088     // We don't transform this pattern:
2089     //   add (zext i1 X), -1 -> sext (not i1 X)
2090     // because most (?) targets generate better code for the zext form.
2091     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2092         isOneConstantOrOneSplatConstant(N1)) {
2093       SDValue X = N0.getOperand(0);
2094       if ((!LegalOperations ||
2095            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2096             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2097           X.getScalarValueSizeInBits() == 1) {
2098         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2099         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2100       }
2101     }
2102
2103     // Undo the add -> or combine to merge constant offsets from a frame index.
2104     if (N0.getOpcode() == ISD::OR &&
2105         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2106         isa<ConstantSDNode>(N0.getOperand(1)) &&
2107         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2108       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2109       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2110     }
2111   }
2112
2113   if (SDValue NewSel = foldBinOpIntoSelect(N))
2114     return NewSel;
2115
2116   // reassociate add
2117   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2118     return RADD;
2119
2120   // fold ((0-A) + B) -> B-A
2121   if (N0.getOpcode() == ISD::SUB &&
2122       isNullConstantOrNullSplatConstant(N0.getOperand(0)))
2123     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2124
2125   // fold (A + (0-B)) -> A-B
2126   if (N1.getOpcode() == ISD::SUB &&
2127       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2128     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2129
2130   // fold (A+(B-A)) -> B
2131   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2132     return N1.getOperand(0);
2133
2134   // fold ((B-A)+A) -> B
2135   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2136     return N0.getOperand(0);
2137
2138   // fold (A+(B-(A+C))) to (B-C)
2139   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2140       N0 == N1.getOperand(1).getOperand(0))
2141     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2142                        N1.getOperand(1).getOperand(1));
2143
2144   // fold (A+(B-(C+A))) to (B-C)
2145   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2146       N0 == N1.getOperand(1).getOperand(1))
2147     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2148                        N1.getOperand(1).getOperand(0));
2149
2150   // fold (A+((B-A)+or-C)) to (B+or-C)
2151   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2152       N1.getOperand(0).getOpcode() == ISD::SUB &&
2153       N0 == N1.getOperand(0).getOperand(1))
2154     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2155                        N1.getOperand(1));
2156
2157   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2158   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2159     SDValue N00 = N0.getOperand(0);
2160     SDValue N01 = N0.getOperand(1);
2161     SDValue N10 = N1.getOperand(0);
2162     SDValue N11 = N1.getOperand(1);
2163
2164     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2165       return DAG.getNode(ISD::SUB, DL, VT,
2166                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2167                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2168   }
2169
2170   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2171     return V;
2172
2173   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2174     return V;
2175
2176   if (SimplifyDemandedBits(SDValue(N, 0)))
2177     return SDValue(N, 0);
2178
2179   // fold (a+b) -> (a|b) iff a and b share no bits.
2180   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2181       DAG.haveNoCommonBitsSet(N0, N1))
2182     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2183
2184   // fold (add (xor a, -1), 1) -> (sub 0, a)
2185   if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1))
2186     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2187                        N0.getOperand(0));
2188
2189   if (SDValue Combined = visitADDLike(N0, N1, N))
2190     return Combined;
2191
2192   if (SDValue Combined = visitADDLike(N1, N0, N))
2193     return Combined;
2194
2195   return SDValue();
2196 }
2197
2198 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2199   bool Masked = false;
2200
2201   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2202   while (true) {
2203     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2204       V = V.getOperand(0);
2205       continue;
2206     }
2207
2208     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2209       Masked = true;
2210       V = V.getOperand(0);
2211       continue;
2212     }
2213
2214     break;
2215   }
2216
2217   // If this is not a carry, return.
2218   if (V.getResNo() != 1)
2219     return SDValue();
2220
2221   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2222       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2223     return SDValue();
2224
2225   // If the result is masked, then no matter what kind of bool it is we can
2226   // return. If it isn't, then we need to make sure the bool type is either 0 or
2227   // 1 and not other values.
2228   if (Masked ||
2229       TLI.getBooleanContents(V.getValueType()) ==
2230           TargetLoweringBase::ZeroOrOneBooleanContent)
2231     return V;
2232
2233   return SDValue();
2234 }
2235
2236 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2237   EVT VT = N0.getValueType();
2238   SDLoc DL(LocReference);
2239
2240   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2241   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2242       isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2243     return DAG.getNode(ISD::SUB, DL, VT, N0,
2244                        DAG.getNode(ISD::SHL, DL, VT,
2245                                    N1.getOperand(0).getOperand(1),
2246                                    N1.getOperand(1)));
2247
2248   if (N1.getOpcode() == ISD::AND) {
2249     SDValue AndOp0 = N1.getOperand(0);
2250     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2251     unsigned DestBits = VT.getScalarSizeInBits();
2252
2253     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2254     // and similar xforms where the inner op is either ~0 or 0.
2255     if (NumSignBits == DestBits &&
2256         isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2257       return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2258   }
2259
2260   // add (sext i1), X -> sub X, (zext i1)
2261   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2262       N0.getOperand(0).getValueType() == MVT::i1 &&
2263       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2264     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2265     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2266   }
2267
2268   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2269   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2270     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2271     if (TN->getVT() == MVT::i1) {
2272       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2273                                  DAG.getConstant(1, DL, VT));
2274       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2275     }
2276   }
2277
2278   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2279   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2280       N1.getResNo() == 0)
2281     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2282                        N0, N1.getOperand(0), N1.getOperand(2));
2283
2284   // (add X, Carry) -> (addcarry X, 0, Carry)
2285   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2286     if (SDValue Carry = getAsCarry(TLI, N1))
2287       return DAG.getNode(ISD::ADDCARRY, DL,
2288                          DAG.getVTList(VT, Carry.getValueType()), N0,
2289                          DAG.getConstant(0, DL, VT), Carry);
2290
2291   return SDValue();
2292 }
2293
2294 SDValue DAGCombiner::visitADDC(SDNode *N) {
2295   SDValue N0 = N->getOperand(0);
2296   SDValue N1 = N->getOperand(1);
2297   EVT VT = N0.getValueType();
2298   SDLoc DL(N);
2299
2300   // If the flag result is dead, turn this into an ADD.
2301   if (!N->hasAnyUseOfValue(1))
2302     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2303                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2304
2305   // canonicalize constant to RHS.
2306   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2307   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2308   if (N0C && !N1C)
2309     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2310
2311   // fold (addc x, 0) -> x + no carry out
2312   if (isNullConstant(N1))
2313     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2314                                         DL, MVT::Glue));
2315
2316   // If it cannot overflow, transform into an add.
2317   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2318     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2319                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2320
2321   return SDValue();
2322 }
2323
2324 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2325                            SelectionDAG &DAG, const TargetLowering &TLI) {
2326   SDValue Cst;
2327   switch (TLI.getBooleanContents(VT)) {
2328   case TargetLowering::ZeroOrOneBooleanContent:
2329   case TargetLowering::UndefinedBooleanContent:
2330     Cst = DAG.getConstant(1, DL, VT);
2331     break;
2332   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2333     Cst = DAG.getConstant(-1, DL, VT);
2334     break;
2335   }
2336
2337   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2338 }
2339
2340 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2341   if (V.getOpcode() != ISD::XOR) return false;
2342   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
2343   if (!Const) return false;
2344
2345   switch(TLI.getBooleanContents(VT)) {
2346     case TargetLowering::ZeroOrOneBooleanContent:
2347       return Const->isOne();
2348     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2349       return Const->isAllOnesValue();
2350     case TargetLowering::UndefinedBooleanContent:
2351       return (Const->getAPIntValue() & 0x01) == 1;
2352   }
2353   llvm_unreachable("Unsupported boolean content");
2354 }
2355
2356 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2357   SDValue N0 = N->getOperand(0);
2358   SDValue N1 = N->getOperand(1);
2359   EVT VT = N0.getValueType();
2360   if (VT.isVector())
2361     return SDValue();
2362
2363   EVT CarryVT = N->getValueType(1);
2364   SDLoc DL(N);
2365
2366   // If the flag result is dead, turn this into an ADD.
2367   if (!N->hasAnyUseOfValue(1))
2368     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2369                      DAG.getUNDEF(CarryVT));
2370
2371   // canonicalize constant to RHS.
2372   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2373   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2374   if (N0C && !N1C)
2375     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2376
2377   // fold (uaddo x, 0) -> x + no carry out
2378   if (isNullConstant(N1))
2379     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2380
2381   // If it cannot overflow, transform into an add.
2382   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2383     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2384                      DAG.getConstant(0, DL, CarryVT));
2385
2386   // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2387   if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) {
2388     SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2389                               DAG.getConstant(0, DL, VT),
2390                               N0.getOperand(0));
2391     return CombineTo(N, Sub,
2392                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2393   }
2394
2395   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2396     return Combined;
2397
2398   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2399     return Combined;
2400
2401   return SDValue();
2402 }
2403
2404 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2405   auto VT = N0.getValueType();
2406
2407   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2408   // If Y + 1 cannot overflow.
2409   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2410     SDValue Y = N1.getOperand(0);
2411     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2412     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2413       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2414                          N1.getOperand(2));
2415   }
2416
2417   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2418   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2419     if (SDValue Carry = getAsCarry(TLI, N1))
2420       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2421                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2422
2423   return SDValue();
2424 }
2425
2426 SDValue DAGCombiner::visitADDE(SDNode *N) {
2427   SDValue N0 = N->getOperand(0);
2428   SDValue N1 = N->getOperand(1);
2429   SDValue CarryIn = N->getOperand(2);
2430
2431   // canonicalize constant to RHS
2432   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2433   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2434   if (N0C && !N1C)
2435     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2436                        N1, N0, CarryIn);
2437
2438   // fold (adde x, y, false) -> (addc x, y)
2439   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2440     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2441
2442   return SDValue();
2443 }
2444
2445 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2446   SDValue N0 = N->getOperand(0);
2447   SDValue N1 = N->getOperand(1);
2448   SDValue CarryIn = N->getOperand(2);
2449   SDLoc DL(N);
2450
2451   // canonicalize constant to RHS
2452   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2453   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2454   if (N0C && !N1C)
2455     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2456
2457   // fold (addcarry x, y, false) -> (uaddo x, y)
2458   if (isNullConstant(CarryIn)) {
2459     if (!LegalOperations ||
2460         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2461       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2462   }
2463
2464   EVT CarryVT = CarryIn.getValueType();
2465
2466   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2467   if (isNullConstant(N0) && isNullConstant(N1)) {
2468     EVT VT = N0.getValueType();
2469     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2470     AddToWorklist(CarryExt.getNode());
2471     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2472                                     DAG.getConstant(1, DL, VT)),
2473                      DAG.getConstant(0, DL, CarryVT));
2474   }
2475
2476   // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2477   if (isBitwiseNot(N0) && isNullConstant(N1) &&
2478       isBooleanFlip(CarryIn, CarryVT, TLI)) {
2479     SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2480                               DAG.getConstant(0, DL, N0.getValueType()),
2481                               N0.getOperand(0), CarryIn.getOperand(0));
2482     return CombineTo(N, Sub,
2483                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2484   }
2485
2486   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2487     return Combined;
2488
2489   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2490     return Combined;
2491
2492   return SDValue();
2493 }
2494
2495 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2496                                        SDNode *N) {
2497   // Iff the flag result is dead:
2498   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2499   if ((N0.getOpcode() == ISD::ADD ||
2500        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2501       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2502     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2503                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2504
2505   /**
2506    * When one of the addcarry argument is itself a carry, we may be facing
2507    * a diamond carry propagation. In which case we try to transform the DAG
2508    * to ensure linear carry propagation if that is possible.
2509    *
2510    * We are trying to get:
2511    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2512    */
2513   if (auto Y = getAsCarry(TLI, N1)) {
2514     /**
2515      *            (uaddo A, B)
2516      *             /       \
2517      *          Carry      Sum
2518      *            |          \
2519      *            | (addcarry *, 0, Z)
2520      *            |       /
2521      *             \   Carry
2522      *              |   /
2523      * (addcarry X, *, *)
2524      */
2525     if (Y.getOpcode() == ISD::UADDO &&
2526         CarryIn.getResNo() == 1 &&
2527         CarryIn.getOpcode() == ISD::ADDCARRY &&
2528         isNullConstant(CarryIn.getOperand(1)) &&
2529         CarryIn.getOperand(0) == Y.getValue(0)) {
2530       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2531                               Y.getOperand(0), Y.getOperand(1),
2532                               CarryIn.getOperand(2));
2533       AddToWorklist(NewY.getNode());
2534       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2535                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2536                          NewY.getValue(1));
2537     }
2538   }
2539
2540   return SDValue();
2541 }
2542
2543 // Since it may not be valid to emit a fold to zero for vector initializers
2544 // check if we can before folding.
2545 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2546                              SelectionDAG &DAG, bool LegalOperations,
2547                              bool LegalTypes) {
2548   if (!VT.isVector())
2549     return DAG.getConstant(0, DL, VT);
2550   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2551     return DAG.getConstant(0, DL, VT);
2552   return SDValue();
2553 }
2554
2555 SDValue DAGCombiner::visitSUB(SDNode *N) {
2556   SDValue N0 = N->getOperand(0);
2557   SDValue N1 = N->getOperand(1);
2558   EVT VT = N0.getValueType();
2559   SDLoc DL(N);
2560
2561   // fold vector ops
2562   if (VT.isVector()) {
2563     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2564       return FoldedVOp;
2565
2566     // fold (sub x, 0) -> x, vector edition
2567     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2568       return N0;
2569   }
2570
2571   // fold (sub x, x) -> 0
2572   // FIXME: Refactor this and xor and other similar operations together.
2573   if (N0 == N1)
2574     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2575   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2576       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2577     // fold (sub c1, c2) -> c1-c2
2578     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2579                                       N1.getNode());
2580   }
2581
2582   if (SDValue NewSel = foldBinOpIntoSelect(N))
2583     return NewSel;
2584
2585   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2586
2587   // fold (sub x, c) -> (add x, -c)
2588   if (N1C) {
2589     return DAG.getNode(ISD::ADD, DL, VT, N0,
2590                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2591   }
2592
2593   if (isNullConstantOrNullSplatConstant(N0)) {
2594     unsigned BitWidth = VT.getScalarSizeInBits();
2595     // Right-shifting everything out but the sign bit followed by negation is
2596     // the same as flipping arithmetic/logical shift type without the negation:
2597     // -(X >>u 31) -> (X >>s 31)
2598     // -(X >>s 31) -> (X >>u 31)
2599     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2600       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2601       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2602         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2603         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2604           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2605       }
2606     }
2607
2608     // 0 - X --> 0 if the sub is NUW.
2609     if (N->getFlags().hasNoUnsignedWrap())
2610       return N0;
2611
2612     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2613       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2614       // N1 must be 0 because negating the minimum signed value is undefined.
2615       if (N->getFlags().hasNoSignedWrap())
2616         return N0;
2617
2618       // 0 - X --> X if X is 0 or the minimum signed value.
2619       return N1;
2620     }
2621   }
2622
2623   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2624   if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2625     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2626
2627   // fold (A - (0-B)) -> A+B
2628   if (N1.getOpcode() == ISD::SUB &&
2629       isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2630     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2631
2632   // fold A-(A-B) -> B
2633   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2634     return N1.getOperand(1);
2635
2636   // fold (A+B)-A -> B
2637   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2638     return N0.getOperand(1);
2639
2640   // fold (A+B)-B -> A
2641   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2642     return N0.getOperand(0);
2643
2644   // fold C2-(A+C1) -> (C2-C1)-A
2645   if (N1.getOpcode() == ISD::ADD) {
2646     SDValue N11 = N1.getOperand(1);
2647     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2648         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2649       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2650       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2651     }
2652   }
2653
2654   // fold ((A+(B+or-C))-B) -> A+or-C
2655   if (N0.getOpcode() == ISD::ADD &&
2656       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2657        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2658       N0.getOperand(1).getOperand(0) == N1)
2659     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2660                        N0.getOperand(1).getOperand(1));
2661
2662   // fold ((A+(C+B))-B) -> A+C
2663   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2664       N0.getOperand(1).getOperand(1) == N1)
2665     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2666                        N0.getOperand(1).getOperand(0));
2667
2668   // fold ((A-(B-C))-C) -> A-B
2669   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2670       N0.getOperand(1).getOperand(1) == N1)
2671     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2672                        N0.getOperand(1).getOperand(0));
2673
2674   // fold (A-(B-C)) -> A+(C-B)
2675   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2676     return DAG.getNode(ISD::ADD, DL, VT, N0,
2677                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2678                                    N1.getOperand(0)));
2679
2680   // fold (X - (-Y * Z)) -> (X + (Y * Z))
2681   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2682     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2683         isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) {
2684       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2685                                 N1.getOperand(0).getOperand(1),
2686                                 N1.getOperand(1));
2687       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2688     }
2689     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2690         isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) {
2691       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2692                                 N1.getOperand(0),
2693                                 N1.getOperand(1).getOperand(1));
2694       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2695     }
2696   }
2697
2698   // If either operand of a sub is undef, the result is undef
2699   if (N0.isUndef())
2700     return N0;
2701   if (N1.isUndef())
2702     return N1;
2703
2704   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2705     return V;
2706
2707   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2708     return V;
2709
2710   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2711   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2712     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2713       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2714       SDValue S0 = N1.getOperand(0);
2715       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2716         unsigned OpSizeInBits = VT.getScalarSizeInBits();
2717         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
2718           if (C->getAPIntValue() == (OpSizeInBits - 1))
2719             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2720       }
2721     }
2722   }
2723
2724   // If the relocation model supports it, consider symbol offsets.
2725   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2726     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2727       // fold (sub Sym, c) -> Sym-c
2728       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2729         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2730                                     GA->getOffset() -
2731                                         (uint64_t)N1C->getSExtValue());
2732       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2733       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2734         if (GA->getGlobal() == GB->getGlobal())
2735           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2736                                  DL, VT);
2737     }
2738
2739   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2740   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2741     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2742     if (TN->getVT() == MVT::i1) {
2743       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2744                                  DAG.getConstant(1, DL, VT));
2745       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2746     }
2747   }
2748
2749   // Prefer an add for more folding potential and possibly better codegen:
2750   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2751   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2752     SDValue ShAmt = N1.getOperand(1);
2753     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2754     if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2755       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2756       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2757     }
2758   }
2759
2760   return SDValue();
2761 }
2762
2763 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2764   SDValue N0 = N->getOperand(0);
2765   SDValue N1 = N->getOperand(1);
2766   EVT VT = N0.getValueType();
2767   SDLoc DL(N);
2768
2769   // If the flag result is dead, turn this into an SUB.
2770   if (!N->hasAnyUseOfValue(1))
2771     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2772                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2773
2774   // fold (subc x, x) -> 0 + no borrow
2775   if (N0 == N1)
2776     return CombineTo(N, DAG.getConstant(0, DL, VT),
2777                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2778
2779   // fold (subc x, 0) -> x + no borrow
2780   if (isNullConstant(N1))
2781     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2782
2783   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2784   if (isAllOnesConstant(N0))
2785     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2786                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2787
2788   return SDValue();
2789 }
2790
2791 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2792   SDValue N0 = N->getOperand(0);
2793   SDValue N1 = N->getOperand(1);
2794   EVT VT = N0.getValueType();
2795   if (VT.isVector())
2796     return SDValue();
2797
2798   EVT CarryVT = N->getValueType(1);
2799   SDLoc DL(N);
2800
2801   // If the flag result is dead, turn this into an SUB.
2802   if (!N->hasAnyUseOfValue(1))
2803     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2804                      DAG.getUNDEF(CarryVT));
2805
2806   // fold (usubo x, x) -> 0 + no borrow
2807   if (N0 == N1)
2808     return CombineTo(N, DAG.getConstant(0, DL, VT),
2809                      DAG.getConstant(0, DL, CarryVT));
2810
2811   // fold (usubo x, 0) -> x + no borrow
2812   if (isNullConstant(N1))
2813     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2814
2815   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2816   if (isAllOnesConstant(N0))
2817     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2818                      DAG.getConstant(0, DL, CarryVT));
2819
2820   return SDValue();
2821 }
2822
2823 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2824   SDValue N0 = N->getOperand(0);
2825   SDValue N1 = N->getOperand(1);
2826   SDValue CarryIn = N->getOperand(2);
2827
2828   // fold (sube x, y, false) -> (subc x, y)
2829   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2830     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2831
2832   return SDValue();
2833 }
2834
2835 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2836   SDValue N0 = N->getOperand(0);
2837   SDValue N1 = N->getOperand(1);
2838   SDValue CarryIn = N->getOperand(2);
2839
2840   // fold (subcarry x, y, false) -> (usubo x, y)
2841   if (isNullConstant(CarryIn)) {
2842     if (!LegalOperations ||
2843         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
2844       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2845   }
2846
2847   return SDValue();
2848 }
2849
2850 SDValue DAGCombiner::visitMUL(SDNode *N) {
2851   SDValue N0 = N->getOperand(0);
2852   SDValue N1 = N->getOperand(1);
2853   EVT VT = N0.getValueType();
2854
2855   // fold (mul x, undef) -> 0
2856   if (N0.isUndef() || N1.isUndef())
2857     return DAG.getConstant(0, SDLoc(N), VT);
2858
2859   bool N0IsConst = false;
2860   bool N1IsConst = false;
2861   bool N1IsOpaqueConst = false;
2862   bool N0IsOpaqueConst = false;
2863   APInt ConstValue0, ConstValue1;
2864   // fold vector ops
2865   if (VT.isVector()) {
2866     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2867       return FoldedVOp;
2868
2869     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2870     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2871     assert((!N0IsConst ||
2872             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2873            "Splat APInt should be element width");
2874     assert((!N1IsConst ||
2875             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2876            "Splat APInt should be element width");
2877   } else {
2878     N0IsConst = isa<ConstantSDNode>(N0);
2879     if (N0IsConst) {
2880       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2881       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2882     }
2883     N1IsConst = isa<ConstantSDNode>(N1);
2884     if (N1IsConst) {
2885       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2886       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2887     }
2888   }
2889
2890   // fold (mul c1, c2) -> c1*c2
2891   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2892     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2893                                       N0.getNode(), N1.getNode());
2894
2895   // canonicalize constant to RHS (vector doesn't have to splat)
2896   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2897      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2898     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2899   // fold (mul x, 0) -> 0
2900   if (N1IsConst && ConstValue1.isNullValue())
2901     return N1;
2902   // fold (mul x, 1) -> x
2903   if (N1IsConst && ConstValue1.isOneValue())
2904     return N0;
2905
2906   if (SDValue NewSel = foldBinOpIntoSelect(N))
2907     return NewSel;
2908
2909   // fold (mul x, -1) -> 0-x
2910   if (N1IsConst && ConstValue1.isAllOnesValue()) {
2911     SDLoc DL(N);
2912     return DAG.getNode(ISD::SUB, DL, VT,
2913                        DAG.getConstant(0, DL, VT), N0);
2914   }
2915   // fold (mul x, (1 << c)) -> x << c
2916   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2917       DAG.isKnownToBeAPowerOfTwo(N1) &&
2918       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2919     SDLoc DL(N);
2920     SDValue LogBase2 = BuildLogBase2(N1, DL);
2921     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2922     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2923     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2924   }
2925   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2926   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2927     unsigned Log2Val = (-ConstValue1).logBase2();
2928     SDLoc DL(N);
2929     // FIXME: If the input is something that is easily negated (e.g. a
2930     // single-use add), we should put the negate there.
2931     return DAG.getNode(ISD::SUB, DL, VT,
2932                        DAG.getConstant(0, DL, VT),
2933                        DAG.getNode(ISD::SHL, DL, VT, N0,
2934                             DAG.getConstant(Log2Val, DL,
2935                                       getShiftAmountTy(N0.getValueType()))));
2936   }
2937
2938   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2939   if (N0.getOpcode() == ISD::SHL &&
2940       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2941       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2942     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2943     if (isConstantOrConstantVector(C3))
2944       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2945   }
2946
2947   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2948   // use.
2949   {
2950     SDValue Sh(nullptr, 0), Y(nullptr, 0);
2951
2952     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
2953     if (N0.getOpcode() == ISD::SHL &&
2954         isConstantOrConstantVector(N0.getOperand(1)) &&
2955         N0.getNode()->hasOneUse()) {
2956       Sh = N0; Y = N1;
2957     } else if (N1.getOpcode() == ISD::SHL &&
2958                isConstantOrConstantVector(N1.getOperand(1)) &&
2959                N1.getNode()->hasOneUse()) {
2960       Sh = N1; Y = N0;
2961     }
2962
2963     if (Sh.getNode()) {
2964       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2965       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2966     }
2967   }
2968
2969   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2970   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2971       N0.getOpcode() == ISD::ADD &&
2972       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2973       isMulAddWithConstProfitable(N, N0, N1))
2974       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2975                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2976                                      N0.getOperand(0), N1),
2977                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2978                                      N0.getOperand(1), N1));
2979
2980   // reassociate mul
2981   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
2982     return RMUL;
2983
2984   return SDValue();
2985 }
2986
2987 /// Return true if divmod libcall is available.
2988 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2989                                      const TargetLowering &TLI) {
2990   RTLIB::Libcall LC;
2991   EVT NodeType = Node->getValueType(0);
2992   if (!NodeType.isSimple())
2993     return false;
2994   switch (NodeType.getSimpleVT().SimpleTy) {
2995   default: return false; // No libcall for vector types.
2996   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
2997   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2998   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2999   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3000   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3001   }
3002
3003   return TLI.getLibcallName(LC) != nullptr;
3004 }
3005
3006 /// Issue divrem if both quotient and remainder are needed.
3007 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3008   if (Node->use_empty())
3009     return SDValue(); // This is a dead node, leave it alone.
3010
3011   unsigned Opcode = Node->getOpcode();
3012   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3013   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3014
3015   // DivMod lib calls can still work on non-legal types if using lib-calls.
3016   EVT VT = Node->getValueType(0);
3017   if (VT.isVector() || !VT.isInteger())
3018     return SDValue();
3019
3020   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3021     return SDValue();
3022
3023   // If DIVREM is going to get expanded into a libcall,
3024   // but there is no libcall available, then don't combine.
3025   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3026       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3027     return SDValue();
3028
3029   // If div is legal, it's better to do the normal expansion
3030   unsigned OtherOpcode = 0;
3031   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3032     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3033     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3034       return SDValue();
3035   } else {
3036     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3037     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3038       return SDValue();
3039   }
3040
3041   SDValue Op0 = Node->getOperand(0);
3042   SDValue Op1 = Node->getOperand(1);
3043   SDValue combined;
3044   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3045          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3046     SDNode *User = *UI;
3047     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3048         User->use_empty())
3049       continue;
3050     // Convert the other matching node(s), too;
3051     // otherwise, the DIVREM may get target-legalized into something
3052     // target-specific that we won't be able to recognize.
3053     unsigned UserOpc = User->getOpcode();
3054     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3055         User->getOperand(0) == Op0 &&
3056         User->getOperand(1) == Op1) {
3057       if (!combined) {
3058         if (UserOpc == OtherOpcode) {
3059           SDVTList VTs = DAG.getVTList(VT, VT);
3060           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3061         } else if (UserOpc == DivRemOpc) {
3062           combined = SDValue(User, 0);
3063         } else {
3064           assert(UserOpc == Opcode);
3065           continue;
3066         }
3067       }
3068       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3069         CombineTo(User, combined);
3070       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3071         CombineTo(User, combined.getValue(1));
3072     }
3073   }
3074   return combined;
3075 }
3076
3077 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3078   SDValue N0 = N->getOperand(0);
3079   SDValue N1 = N->getOperand(1);
3080   EVT VT = N->getValueType(0);
3081   SDLoc DL(N);
3082
3083   // X / undef -> undef
3084   // X % undef -> undef
3085   // X / 0 -> undef
3086   // X % 0 -> undef
3087   // NOTE: This includes vectors where any divisor element is zero/undef.
3088   if (DAG.isUndef(N->getOpcode(), {N0, N1}))
3089     return DAG.getUNDEF(VT);
3090
3091   // undef / X -> 0
3092   // undef % X -> 0
3093   if (N0.isUndef())
3094     return DAG.getConstant(0, DL, VT);
3095
3096   return SDValue();
3097 }
3098
3099 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3100   SDValue N0 = N->getOperand(0);
3101   SDValue N1 = N->getOperand(1);
3102   EVT VT = N->getValueType(0);
3103   EVT CCVT = getSetCCResultType(VT);
3104
3105   // fold vector ops
3106   if (VT.isVector())
3107     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3108       return FoldedVOp;
3109
3110   SDLoc DL(N);
3111
3112   // fold (sdiv c1, c2) -> c1/c2
3113   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3114   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3115   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3116     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3117   // fold (sdiv X, 1) -> X
3118   if (N1C && N1C->isOne())
3119     return N0;
3120   // fold (sdiv X, -1) -> 0-X
3121   if (N1C && N1C->isAllOnesValue())
3122     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3123   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3124   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3125     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3126                          DAG.getConstant(1, DL, VT),
3127                          DAG.getConstant(0, DL, VT));
3128
3129   if (SDValue V = simplifyDivRem(N, DAG))
3130     return V;
3131
3132   if (SDValue NewSel = foldBinOpIntoSelect(N))
3133     return NewSel;
3134
3135   // If we know the sign bits of both operands are zero, strength reduce to a
3136   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3137   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3138     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3139
3140   if (SDValue V = visitSDIVLike(N0, N1, N))
3141     return V;
3142
3143   // sdiv, srem -> sdivrem
3144   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3145   // true.  Otherwise, we break the simplification logic in visitREM().
3146   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3147   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3148     if (SDValue DivRem = useDivRem(N))
3149         return DivRem;
3150
3151   return SDValue();
3152 }
3153
3154 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3155   SDLoc DL(N);
3156   EVT VT = N->getValueType(0);
3157   EVT CCVT = getSetCCResultType(VT);
3158   unsigned BitWidth = VT.getScalarSizeInBits();
3159
3160   // Helper for determining whether a value is a power-2 constant scalar or a
3161   // vector of such elements.
3162   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3163     if (C->isNullValue() || C->isOpaque())
3164       return false;
3165     if (C->getAPIntValue().isPowerOf2())
3166       return true;
3167     if ((-C->getAPIntValue()).isPowerOf2())
3168       return true;
3169     return false;
3170   };
3171
3172   // fold (sdiv X, pow2) -> simple ops after legalize
3173   // FIXME: We check for the exact bit here because the generic lowering gives
3174   // better results in that case. The target-specific lowering should learn how
3175   // to handle exact sdivs efficiently.
3176   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3177     // Target-specific implementation of sdiv x, pow2.
3178     if (SDValue Res = BuildSDIVPow2(N))
3179       return Res;
3180
3181     // Create constants that are functions of the shift amount value.
3182     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3183     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3184     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3185     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3186     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3187     if (!isConstantOrConstantVector(Inexact))
3188       return SDValue();
3189
3190     // Splat the sign bit into the register
3191     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3192                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3193     AddToWorklist(Sign.getNode());
3194
3195     // Add (N0 < 0) ? abs2 - 1 : 0;
3196     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3197     AddToWorklist(Srl.getNode());
3198     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3199     AddToWorklist(Add.getNode());
3200     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3201     AddToWorklist(Sra.getNode());
3202
3203     // Special case: (sdiv X, 1) -> X
3204     // Special Case: (sdiv X, -1) -> 0-X
3205     SDValue One = DAG.getConstant(1, DL, VT);
3206     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3207     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3208     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3209     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3210     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3211
3212     // If dividing by a positive value, we're done. Otherwise, the result must
3213     // be negated.
3214     SDValue Zero = DAG.getConstant(0, DL, VT);
3215     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3216
3217     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3218     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3219     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3220     return Res;
3221   }
3222
3223   // If integer divide is expensive and we satisfy the requirements, emit an
3224   // alternate sequence.  Targets may check function attributes for size/speed
3225   // trade-offs.
3226   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3227   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3228       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3229     if (SDValue Op = BuildSDIV(N))
3230       return Op;
3231
3232   return SDValue();
3233 }
3234
3235 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3236   SDValue N0 = N->getOperand(0);
3237   SDValue N1 = N->getOperand(1);
3238   EVT VT = N->getValueType(0);
3239   EVT CCVT = getSetCCResultType(VT);
3240
3241   // fold vector ops
3242   if (VT.isVector())
3243     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3244       return FoldedVOp;
3245
3246   SDLoc DL(N);
3247
3248   // fold (udiv c1, c2) -> c1/c2
3249   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3250   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3251   if (N0C && N1C)
3252     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3253                                                     N0C, N1C))
3254       return Folded;
3255   // fold (udiv X, 1) -> X
3256   if (N1C && N1C->isOne())
3257     return N0;
3258   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3259   if (N1C && N1C->getAPIntValue().isAllOnesValue())
3260     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3261                          DAG.getConstant(1, DL, VT),
3262                          DAG.getConstant(0, DL, VT));
3263
3264   if (SDValue V = simplifyDivRem(N, DAG))
3265     return V;
3266
3267   if (SDValue NewSel = foldBinOpIntoSelect(N))
3268     return NewSel;
3269
3270   if (SDValue V = visitUDIVLike(N0, N1, N))
3271     return V;
3272
3273   // sdiv, srem -> sdivrem
3274   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3275   // true.  Otherwise, we break the simplification logic in visitREM().
3276   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3277   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3278     if (SDValue DivRem = useDivRem(N))
3279         return DivRem;
3280
3281   return SDValue();
3282 }
3283
3284 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3285   SDLoc DL(N);
3286   EVT VT = N->getValueType(0);
3287
3288   // fold (udiv x, (1 << c)) -> x >>u c
3289   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3290       DAG.isKnownToBeAPowerOfTwo(N1)) {
3291     SDValue LogBase2 = BuildLogBase2(N1, DL);
3292     AddToWorklist(LogBase2.getNode());
3293
3294     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3295     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3296     AddToWorklist(Trunc.getNode());
3297     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3298   }
3299
3300   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3301   if (N1.getOpcode() == ISD::SHL) {
3302     SDValue N10 = N1.getOperand(0);
3303     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3304         DAG.isKnownToBeAPowerOfTwo(N10)) {
3305       SDValue LogBase2 = BuildLogBase2(N10, DL);
3306       AddToWorklist(LogBase2.getNode());
3307
3308       EVT ADDVT = N1.getOperand(1).getValueType();
3309       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3310       AddToWorklist(Trunc.getNode());
3311       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3312       AddToWorklist(Add.getNode());
3313       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3314     }
3315   }
3316
3317   // fold (udiv x, c) -> alternate
3318   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3319   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3320       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3321     if (SDValue Op = BuildUDIV(N))
3322       return Op;
3323
3324   return SDValue();
3325 }
3326
3327 // handles ISD::SREM and ISD::UREM
3328 SDValue DAGCombiner::visitREM(SDNode *N) {
3329   unsigned Opcode = N->getOpcode();
3330   SDValue N0 = N->getOperand(0);
3331   SDValue N1 = N->getOperand(1);
3332   EVT VT = N->getValueType(0);
3333   EVT CCVT = getSetCCResultType(VT);
3334
3335   bool isSigned = (Opcode == ISD::SREM);
3336   SDLoc DL(N);
3337
3338   // fold (rem c1, c2) -> c1%c2
3339   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3340   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3341   if (N0C && N1C)
3342     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3343       return Folded;
3344   // fold (urem X, -1) -> select(X == -1, 0, x)
3345   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3346     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3347                          DAG.getConstant(0, DL, VT), N0);
3348
3349   if (SDValue V = simplifyDivRem(N, DAG))
3350     return V;
3351
3352   if (SDValue NewSel = foldBinOpIntoSelect(N))
3353     return NewSel;
3354
3355   if (isSigned) {
3356     // If we know the sign bits of both operands are zero, strength reduce to a
3357     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3358     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3359       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3360   } else {
3361     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3362     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3363       // fold (urem x, pow2) -> (and x, pow2-1)
3364       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3365       AddToWorklist(Add.getNode());
3366       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3367     }
3368     if (N1.getOpcode() == ISD::SHL &&
3369         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3370       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3371       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3372       AddToWorklist(Add.getNode());
3373       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3374     }
3375   }
3376
3377   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3378
3379   // If X/C can be simplified by the division-by-constant logic, lower
3380   // X%C to the equivalent of X-X/C*C.
3381   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3382   // speculative DIV must not cause a DIVREM conversion.  We guard against this
3383   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
3384   // combine will not return a DIVREM.  Regardless, checking cheapness here
3385   // makes sense since the simplification results in fatter code.
3386   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3387     SDValue OptimizedDiv =
3388         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3389     if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM &&
3390         OptimizedDiv.getOpcode() != ISD::SDIVREM) {
3391       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3392       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3393       AddToWorklist(OptimizedDiv.getNode());
3394       AddToWorklist(Mul.getNode());
3395       return Sub;
3396     }
3397   }
3398
3399   // sdiv, srem -> sdivrem
3400   if (SDValue DivRem = useDivRem(N))
3401     return DivRem.getValue(1);
3402
3403   return SDValue();
3404 }
3405
3406 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3407   SDValue N0 = N->getOperand(0);
3408   SDValue N1 = N->getOperand(1);
3409   EVT VT = N->getValueType(0);
3410   SDLoc DL(N);
3411
3412   if (VT.isVector()) {
3413     // fold (mulhs x, 0) -> 0
3414     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3415       return N1;
3416     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3417       return N0;
3418   }
3419
3420   // fold (mulhs x, 0) -> 0
3421   if (isNullConstant(N1))
3422     return N1;
3423   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3424   if (isOneConstant(N1))
3425     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3426                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3427                                        getShiftAmountTy(N0.getValueType())));
3428
3429   // fold (mulhs x, undef) -> 0
3430   if (N0.isUndef() || N1.isUndef())
3431     return DAG.getConstant(0, DL, VT);
3432
3433   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3434   // plus a shift.
3435   if (VT.isSimple() && !VT.isVector()) {
3436     MVT Simple = VT.getSimpleVT();
3437     unsigned SimpleSize = Simple.getSizeInBits();
3438     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3439     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3440       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3441       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3442       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3443       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3444             DAG.getConstant(SimpleSize, DL,
3445                             getShiftAmountTy(N1.getValueType())));
3446       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3447     }
3448   }
3449
3450   return SDValue();
3451 }
3452
3453 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3454   SDValue N0 = N->getOperand(0);
3455   SDValue N1 = N->getOperand(1);
3456   EVT VT = N->getValueType(0);
3457   SDLoc DL(N);
3458
3459   if (VT.isVector()) {
3460     // fold (mulhu x, 0) -> 0
3461     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3462       return N1;
3463     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3464       return N0;
3465   }
3466
3467   // fold (mulhu x, 0) -> 0
3468   if (isNullConstant(N1))
3469     return N1;
3470   // fold (mulhu x, 1) -> 0
3471   if (isOneConstant(N1))
3472     return DAG.getConstant(0, DL, N0.getValueType());
3473   // fold (mulhu x, undef) -> 0
3474   if (N0.isUndef() || N1.isUndef())
3475     return DAG.getConstant(0, DL, VT);
3476
3477   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3478   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3479       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3480     SDLoc DL(N);
3481     unsigned NumEltBits = VT.getScalarSizeInBits();
3482     SDValue LogBase2 = BuildLogBase2(N1, DL);
3483     SDValue SRLAmt = DAG.getNode(
3484         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3485     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3486     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3487     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3488   }
3489
3490   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3491   // plus a shift.
3492   if (VT.isSimple() && !VT.isVector()) {
3493     MVT Simple = VT.getSimpleVT();
3494     unsigned SimpleSize = Simple.getSizeInBits();
3495     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3496     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3497       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3498       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3499       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3500       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3501             DAG.getConstant(SimpleSize, DL,
3502                             getShiftAmountTy(N1.getValueType())));
3503       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3504     }
3505   }
3506
3507   return SDValue();
3508 }
3509
3510 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3511 /// give the opcodes for the two computations that are being performed. Return
3512 /// true if a simplification was made.
3513 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3514                                                 unsigned HiOp) {
3515   // If the high half is not needed, just compute the low half.
3516   bool HiExists = N->hasAnyUseOfValue(1);
3517   if (!HiExists &&
3518       (!LegalOperations ||
3519        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3520     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3521     return CombineTo(N, Res, Res);
3522   }
3523
3524   // If the low half is not needed, just compute the high half.
3525   bool LoExists = N->hasAnyUseOfValue(0);
3526   if (!LoExists &&
3527       (!LegalOperations ||
3528        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3529     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3530     return CombineTo(N, Res, Res);
3531   }
3532
3533   // If both halves are used, return as it is.
3534   if (LoExists && HiExists)
3535     return SDValue();
3536
3537   // If the two computed results can be simplified separately, separate them.
3538   if (LoExists) {
3539     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3540     AddToWorklist(Lo.getNode());
3541     SDValue LoOpt = combine(Lo.getNode());
3542     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3543         (!LegalOperations ||
3544          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3545       return CombineTo(N, LoOpt, LoOpt);
3546   }
3547
3548   if (HiExists) {
3549     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3550     AddToWorklist(Hi.getNode());
3551     SDValue HiOpt = combine(Hi.getNode());
3552     if (HiOpt.getNode() && HiOpt != Hi &&
3553         (!LegalOperations ||
3554          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3555       return CombineTo(N, HiOpt, HiOpt);
3556   }
3557
3558   return SDValue();
3559 }
3560
3561 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3562   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3563     return Res;
3564
3565   EVT VT = N->getValueType(0);
3566   SDLoc DL(N);
3567
3568   // If the type is twice as wide is legal, transform the mulhu to a wider
3569   // multiply plus a shift.
3570   if (VT.isSimple() && !VT.isVector()) {
3571     MVT Simple = VT.getSimpleVT();
3572     unsigned SimpleSize = Simple.getSizeInBits();
3573     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3574     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3575       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3576       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3577       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3578       // Compute the high part as N1.
3579       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3580             DAG.getConstant(SimpleSize, DL,
3581                             getShiftAmountTy(Lo.getValueType())));
3582       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3583       // Compute the low part as N0.
3584       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3585       return CombineTo(N, Lo, Hi);
3586     }
3587   }
3588
3589   return SDValue();
3590 }
3591
3592 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3593   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3594     return Res;
3595
3596   EVT VT = N->getValueType(0);
3597   SDLoc DL(N);
3598
3599   // If the type is twice as wide is legal, transform the mulhu to a wider
3600   // multiply plus a shift.
3601   if (VT.isSimple() && !VT.isVector()) {
3602     MVT Simple = VT.getSimpleVT();
3603     unsigned SimpleSize = Simple.getSizeInBits();
3604     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3605     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3606       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3607       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3608       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3609       // Compute the high part as N1.
3610       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3611             DAG.getConstant(SimpleSize, DL,
3612                             getShiftAmountTy(Lo.getValueType())));
3613       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3614       // Compute the low part as N0.
3615       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3616       return CombineTo(N, Lo, Hi);
3617     }
3618   }
3619
3620   return SDValue();
3621 }
3622
3623 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3624   // (smulo x, 2) -> (saddo x, x)
3625   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3626     if (C2->getAPIntValue() == 2)
3627       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3628                          N->getOperand(0), N->getOperand(0));
3629
3630   return SDValue();
3631 }
3632
3633 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3634   // (umulo x, 2) -> (uaddo x, x)
3635   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3636     if (C2->getAPIntValue() == 2)
3637       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3638                          N->getOperand(0), N->getOperand(0));
3639
3640   return SDValue();
3641 }
3642
3643 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3644   SDValue N0 = N->getOperand(0);
3645   SDValue N1 = N->getOperand(1);
3646   EVT VT = N0.getValueType();
3647
3648   // fold vector ops
3649   if (VT.isVector())
3650     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3651       return FoldedVOp;
3652
3653   // fold operation with constant operands.
3654   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3655   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3656   if (N0C && N1C)
3657     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3658
3659   // canonicalize constant to RHS
3660   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3661      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3662     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3663
3664   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3665   // Only do this if the current op isn't legal and the flipped is.
3666   unsigned Opcode = N->getOpcode();
3667   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3668   if (!TLI.isOperationLegal(Opcode, VT) &&
3669       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3670       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3671     unsigned AltOpcode;
3672     switch (Opcode) {
3673     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3674     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3675     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3676     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3677     default: llvm_unreachable("Unknown MINMAX opcode");
3678     }
3679     if (TLI.isOperationLegal(AltOpcode, VT))
3680       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3681   }
3682
3683   return SDValue();
3684 }
3685
3686 /// If this is a binary operator with two operands of the same opcode, try to
3687 /// simplify it.
3688 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3689   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3690   EVT VT = N0.getValueType();
3691   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3692
3693   // Bail early if none of these transforms apply.
3694   if (N0.getNumOperands() == 0) return SDValue();
3695
3696   // For each of OP in AND/OR/XOR:
3697   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3698   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3699   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3700   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3701   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3702   //
3703   // do not sink logical op inside of a vector extend, since it may combine
3704   // into a vsetcc.
3705   EVT Op0VT = N0.getOperand(0).getValueType();
3706   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3707        N0.getOpcode() == ISD::SIGN_EXTEND ||
3708        N0.getOpcode() == ISD::BSWAP ||
3709        // Avoid infinite looping with PromoteIntBinOp.
3710        (N0.getOpcode() == ISD::ANY_EXTEND &&
3711         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3712        (N0.getOpcode() == ISD::TRUNCATE &&
3713         (!TLI.isZExtFree(VT, Op0VT) ||
3714          !TLI.isTruncateFree(Op0VT, VT)) &&
3715         TLI.isTypeLegal(Op0VT))) &&
3716       !VT.isVector() &&
3717       Op0VT == N1.getOperand(0).getValueType() &&
3718       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3719     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3720                                  N0.getOperand(0).getValueType(),
3721                                  N0.getOperand(0), N1.getOperand(0));
3722     AddToWorklist(ORNode.getNode());
3723     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3724   }
3725
3726   // For each of OP in SHL/SRL/SRA/AND...
3727   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3728   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
3729   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3730   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3731        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3732       N0.getOperand(1) == N1.getOperand(1)) {
3733     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3734                                  N0.getOperand(0).getValueType(),
3735                                  N0.getOperand(0), N1.getOperand(0));
3736     AddToWorklist(ORNode.getNode());
3737     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3738                        ORNode, N0.getOperand(1));
3739   }
3740
3741   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3742   // Only perform this optimization up until type legalization, before
3743   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3744   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3745   // we don't want to undo this promotion.
3746   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3747   // on scalars.
3748   if ((N0.getOpcode() == ISD::BITCAST ||
3749        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3750        Level <= AfterLegalizeTypes) {
3751     SDValue In0 = N0.getOperand(0);
3752     SDValue In1 = N1.getOperand(0);
3753     EVT In0Ty = In0.getValueType();
3754     EVT In1Ty = In1.getValueType();
3755     SDLoc DL(N);
3756     // If both incoming values are integers, and the original types are the
3757     // same.
3758     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3759       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3760       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3761       AddToWorklist(Op.getNode());
3762       return BC;
3763     }
3764   }
3765
3766   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3767   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3768   // If both shuffles use the same mask, and both shuffle within a single
3769   // vector, then it is worthwhile to move the swizzle after the operation.
3770   // The type-legalizer generates this pattern when loading illegal
3771   // vector types from memory. In many cases this allows additional shuffle
3772   // optimizations.
3773   // There are other cases where moving the shuffle after the xor/and/or
3774   // is profitable even if shuffles don't perform a swizzle.
3775   // If both shuffles use the same mask, and both shuffles have the same first
3776   // or second operand, then it might still be profitable to move the shuffle
3777   // after the xor/and/or operation.
3778   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3779     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3780     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3781
3782     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3783            "Inputs to shuffles are not the same type");
3784
3785     // Check that both shuffles use the same mask. The masks are known to be of
3786     // the same length because the result vector type is the same.
3787     // Check also that shuffles have only one use to avoid introducing extra
3788     // instructions.
3789     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3790         SVN0->getMask().equals(SVN1->getMask())) {
3791       SDValue ShOp = N0->getOperand(1);
3792
3793       // Don't try to fold this node if it requires introducing a
3794       // build vector of all zeros that might be illegal at this stage.
3795       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3796         if (!LegalTypes)
3797           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3798         else
3799           ShOp = SDValue();
3800       }
3801
3802       // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
3803       // (OR  (shuf (A, C), shuf (B, C))) -> shuf (OR  (A, B), C)
3804       // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
3805       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3806         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3807                                       N0->getOperand(0), N1->getOperand(0));
3808         AddToWorklist(NewNode.getNode());
3809         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3810                                     SVN0->getMask());
3811       }
3812
3813       // Don't try to fold this node if it requires introducing a
3814       // build vector of all zeros that might be illegal at this stage.
3815       ShOp = N0->getOperand(0);
3816       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3817         if (!LegalTypes)
3818           ShOp = DAG.getConstant(0, SDLoc(N), VT);
3819         else
3820           ShOp = SDValue();
3821       }
3822
3823       // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
3824       // (OR  (shuf (C, A), shuf (C, B))) -> shuf (C, OR  (A, B))
3825       // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
3826       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3827         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3828                                       N0->getOperand(1), N1->getOperand(1));
3829         AddToWorklist(NewNode.getNode());
3830         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3831                                     SVN0->getMask());
3832       }
3833     }
3834   }
3835
3836   return SDValue();
3837 }
3838
3839 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3840 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3841                                        const SDLoc &DL) {
3842   SDValue LL, LR, RL, RR, N0CC, N1CC;
3843   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3844       !isSetCCEquivalent(N1, RL, RR, N1CC))
3845     return SDValue();
3846
3847   assert(N0.getValueType() == N1.getValueType() &&
3848          "Unexpected operand types for bitwise logic op");
3849   assert(LL.getValueType() == LR.getValueType() &&
3850          RL.getValueType() == RR.getValueType() &&
3851          "Unexpected operand types for setcc");
3852
3853   // If we're here post-legalization or the logic op type is not i1, the logic
3854   // op type must match a setcc result type. Also, all folds require new
3855   // operations on the left and right operands, so those types must match.
3856   EVT VT = N0.getValueType();
3857   EVT OpVT = LL.getValueType();
3858   if (LegalOperations || VT.getScalarType() != MVT::i1)
3859     if (VT != getSetCCResultType(OpVT))
3860       return SDValue();
3861   if (OpVT != RL.getValueType())
3862     return SDValue();
3863
3864   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3865   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3866   bool IsInteger = OpVT.isInteger();
3867   if (LR == RR && CC0 == CC1 && IsInteger) {
3868     bool IsZero = isNullConstantOrNullSplatConstant(LR);
3869     bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3870
3871     // All bits clear?
3872     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3873     // All sign bits clear?
3874     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3875     // Any bits set?
3876     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3877     // Any sign bits set?
3878     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3879
3880     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
3881     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3882     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
3883     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
3884     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3885       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3886       AddToWorklist(Or.getNode());
3887       return DAG.getSetCC(DL, VT, Or, LR, CC1);
3888     }
3889
3890     // All bits set?
3891     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3892     // All sign bits set?
3893     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3894     // Any bits clear?
3895     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3896     // Any sign bits clear?
3897     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3898
3899     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3900     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
3901     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3902     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
3903     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3904       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3905       AddToWorklist(And.getNode());
3906       return DAG.getSetCC(DL, VT, And, LR, CC1);
3907     }
3908   }
3909
3910   // TODO: What is the 'or' equivalent of this fold?
3911   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3912   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3913       IsInteger && CC0 == ISD::SETNE &&
3914       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3915        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3916     SDValue One = DAG.getConstant(1, DL, OpVT);
3917     SDValue Two = DAG.getConstant(2, DL, OpVT);
3918     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3919     AddToWorklist(Add.getNode());
3920     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3921   }
3922
3923   // Try more general transforms if the predicates match and the only user of
3924   // the compares is the 'and' or 'or'.
3925   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3926       N0.hasOneUse() && N1.hasOneUse()) {
3927     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3928     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3929     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3930       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3931       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3932       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3933       SDValue Zero = DAG.getConstant(0, DL, OpVT);
3934       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3935     }
3936   }
3937
3938   // Canonicalize equivalent operands to LL == RL.
3939   if (LL == RR && LR == RL) {
3940     CC1 = ISD::getSetCCSwappedOperands(CC1);
3941     std::swap(RL, RR);
3942   }
3943
3944   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3945   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3946   if (LL == RL && LR == RR) {
3947     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3948                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3949     if (NewCC != ISD::SETCC_INVALID &&
3950         (!LegalOperations ||
3951          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3952           TLI.isOperationLegal(ISD::SETCC, OpVT))))
3953       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3954   }
3955
3956   return SDValue();
3957 }
3958
3959 /// This contains all DAGCombine rules which reduce two values combined by
3960 /// an And operation to a single value. This makes them reusable in the context
3961 /// of visitSELECT(). Rules involving constants are not included as
3962 /// visitSELECT() already handles those cases.
3963 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3964   EVT VT = N1.getValueType();
3965   SDLoc DL(N);
3966
3967   // fold (and x, undef) -> 0
3968   if (N0.isUndef() || N1.isUndef())
3969     return DAG.getConstant(0, DL, VT);
3970
3971   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3972     return V;
3973
3974   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3975       VT.getSizeInBits() <= 64) {
3976     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3977       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3978         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3979         // immediate for an add, but it is legal if its top c2 bits are set,
3980         // transform the ADD so the immediate doesn't need to be materialized
3981         // in a register.
3982         APInt ADDC = ADDI->getAPIntValue();
3983         APInt SRLC = SRLI->getAPIntValue();
3984         if (ADDC.getMinSignedBits() <= 64 &&
3985             SRLC.ult(VT.getSizeInBits()) &&
3986             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3987           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3988                                              SRLC.getZExtValue());
3989           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3990             ADDC |= Mask;
3991             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3992               SDLoc DL0(N0);
3993               SDValue NewAdd =
3994                 DAG.getNode(ISD::ADD, DL0, VT,
3995                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3996               CombineTo(N0.getNode(), NewAdd);
3997               // Return N so it doesn't get rechecked!
3998               return SDValue(N, 0);
3999             }
4000           }
4001         }
4002       }
4003     }
4004   }
4005
4006   // Reduce bit extract of low half of an integer to the narrower type.
4007   // (and (srl i64:x, K), KMask) ->
4008   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4009   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4010     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4011       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4012         unsigned Size = VT.getSizeInBits();
4013         const APInt &AndMask = CAnd->getAPIntValue();
4014         unsigned ShiftBits = CShift->getZExtValue();
4015
4016         // Bail out, this node will probably disappear anyway.
4017         if (ShiftBits == 0)
4018           return SDValue();
4019
4020         unsigned MaskBits = AndMask.countTrailingOnes();
4021         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4022
4023         if (AndMask.isMask() &&
4024             // Required bits must not span the two halves of the integer and
4025             // must fit in the half size type.
4026             (ShiftBits + MaskBits <= Size / 2) &&
4027             TLI.isNarrowingProfitable(VT, HalfVT) &&
4028             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4029             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4030             TLI.isTruncateFree(VT, HalfVT) &&
4031             TLI.isZExtFree(HalfVT, VT)) {
4032           // The isNarrowingProfitable is to avoid regressions on PPC and
4033           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4034           // on downstream users of this. Those patterns could probably be
4035           // extended to handle extensions mixed in.
4036
4037           SDValue SL(N0);
4038           assert(MaskBits <= Size);
4039
4040           // Extracting the highest bit of the low half.
4041           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4042           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4043                                       N0.getOperand(0));
4044
4045           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4046           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4047           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4048           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4049           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4050         }
4051       }
4052     }
4053   }
4054
4055   return SDValue();
4056 }
4057
4058 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4059                                    EVT LoadResultTy, EVT &ExtVT) {
4060   if (!AndC->getAPIntValue().isMask())
4061     return false;
4062
4063   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4064
4065   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4066   EVT LoadedVT = LoadN->getMemoryVT();
4067
4068   if (ExtVT == LoadedVT &&
4069       (!LegalOperations ||
4070        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4071     // ZEXTLOAD will match without needing to change the size of the value being
4072     // loaded.
4073     return true;
4074   }
4075
4076   // Do not change the width of a volatile load.
4077   if (LoadN->isVolatile())
4078     return false;
4079
4080   // Do not generate loads of non-round integer types since these can
4081   // be expensive (and would be wrong if the type is not byte sized).
4082   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4083     return false;
4084
4085   if (LegalOperations &&
4086       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4087     return false;
4088
4089   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4090     return false;
4091
4092   return true;
4093 }
4094
4095 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4096                                     ISD::LoadExtType ExtType, EVT &MemVT,
4097                                     unsigned ShAmt) {
4098   if (!LDST)
4099     return false;
4100   // Only allow byte offsets.
4101   if (ShAmt % 8)
4102     return false;
4103
4104   // Do not generate loads of non-round integer types since these can
4105   // be expensive (and would be wrong if the type is not byte sized).
4106   if (!MemVT.isRound())
4107     return false;
4108
4109   // Don't change the width of a volatile load.
4110   if (LDST->isVolatile())
4111     return false;
4112
4113   // Verify that we are actually reducing a load width here.
4114   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4115     return false;
4116
4117   // Ensure that this isn't going to produce an unsupported unaligned access.
4118   if (ShAmt &&
4119       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4120                               LDST->getAddressSpace(), ShAmt / 8))
4121     return false;
4122
4123   // It's not possible to generate a constant of extended or untyped type.
4124   EVT PtrType = LDST->getBasePtr().getValueType();
4125   if (PtrType == MVT::Untyped || PtrType.isExtended())
4126     return false;
4127
4128   if (isa<LoadSDNode>(LDST)) {
4129     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4130     // Don't transform one with multiple uses, this would require adding a new
4131     // load.
4132     if (!SDValue(Load, 0).hasOneUse())
4133       return false;
4134
4135     if (LegalOperations &&
4136         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4137       return false;
4138
4139     // For the transform to be legal, the load must produce only two values
4140     // (the value loaded and the chain).  Don't transform a pre-increment
4141     // load, for example, which produces an extra value.  Otherwise the
4142     // transformation is not equivalent, and the downstream logic to replace
4143     // uses gets things wrong.
4144     if (Load->getNumValues() > 2)
4145       return false;
4146
4147     // If the load that we're shrinking is an extload and we're not just
4148     // discarding the extension we can't simply shrink the load. Bail.
4149     // TODO: It would be possible to merge the extensions in some cases.
4150     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4151         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4152       return false;
4153
4154     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4155       return false;
4156   } else {
4157     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4158     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4159     // Can't write outside the original store
4160     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4161       return false;
4162
4163     if (LegalOperations &&
4164         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4165       return false;
4166   }
4167   return true;
4168 }
4169
4170 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4171                                     SmallPtrSetImpl<LoadSDNode*> &Loads,
4172                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4173                                     ConstantSDNode *Mask,
4174                                     SDNode *&NodeToMask) {
4175   // Recursively search for the operands, looking for loads which can be
4176   // narrowed.
4177   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4178     SDValue Op = N->getOperand(i);
4179
4180     if (Op.getValueType().isVector())
4181       return false;
4182
4183     // Some constants may need fixing up later if they are too large.
4184     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4185       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4186           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4187         NodesWithConsts.insert(N);
4188       continue;
4189     }
4190
4191     if (!Op.hasOneUse())
4192       return false;
4193
4194     switch(Op.getOpcode()) {
4195     case ISD::LOAD: {
4196       auto *Load = cast<LoadSDNode>(Op);
4197       EVT ExtVT;
4198       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4199           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4200
4201         // ZEXTLOAD is already small enough.
4202         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4203             ExtVT.bitsGE(Load->getMemoryVT()))
4204           continue;
4205
4206         // Use LE to convert equal sized loads to zext.
4207         if (ExtVT.bitsLE(Load->getMemoryVT()))
4208           Loads.insert(Load);
4209
4210         continue;
4211       }
4212       return false;
4213     }
4214     case ISD::ZERO_EXTEND:
4215     case ISD::AssertZext: {
4216       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4217       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4218       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4219         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4220         Op.getOperand(0).getValueType();
4221
4222       // We can accept extending nodes if the mask is wider or an equal
4223       // width to the original type.
4224       if (ExtVT.bitsGE(VT))
4225         continue;
4226       break;
4227     }
4228     case ISD::OR:
4229     case ISD::XOR:
4230     case ISD::AND:
4231       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4232                              NodeToMask))
4233         return false;
4234       continue;
4235     }
4236
4237     // Allow one node which will masked along with any loads found.
4238     if (NodeToMask)
4239       return false;
4240
4241     // Also ensure that the node to be masked only produces one data result.
4242     NodeToMask = Op.getNode();
4243     if (NodeToMask->getNumValues() > 1) {
4244       bool HasValue = false;
4245       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4246         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4247         if (VT != MVT::Glue && VT != MVT::Other) {
4248           if (HasValue) {
4249             NodeToMask = nullptr;
4250             return false;
4251           }
4252           HasValue = true;
4253         }
4254       }
4255       assert(HasValue && "Node to be masked has no data result?");
4256     }
4257   }
4258   return true;
4259 }
4260
4261 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4262   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4263   if (!Mask)
4264     return false;
4265
4266   if (!Mask->getAPIntValue().isMask())
4267     return false;
4268
4269   // No need to do anything if the and directly uses a load.
4270   if (isa<LoadSDNode>(N->getOperand(0)))
4271     return false;
4272
4273   SmallPtrSet<LoadSDNode*, 8> Loads;
4274   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4275   SDNode *FixupNode = nullptr;
4276   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4277     if (Loads.size() == 0)
4278       return false;
4279
4280     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4281     SDValue MaskOp = N->getOperand(1);
4282
4283     // If it exists, fixup the single node we allow in the tree that needs
4284     // masking.
4285     if (FixupNode) {
4286       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4287       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4288                                 FixupNode->getValueType(0),
4289                                 SDValue(FixupNode, 0), MaskOp);
4290       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4291       if (And.getOpcode() == ISD ::AND)
4292         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4293     }
4294
4295     // Narrow any constants that need it.
4296     for (auto *LogicN : NodesWithConsts) {
4297       SDValue Op0 = LogicN->getOperand(0);
4298       SDValue Op1 = LogicN->getOperand(1);
4299
4300       if (isa<ConstantSDNode>(Op0))
4301           std::swap(Op0, Op1);
4302
4303       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4304                                 Op1, MaskOp);
4305
4306       DAG.UpdateNodeOperands(LogicN, Op0, And);
4307     }
4308
4309     // Create narrow loads.
4310     for (auto *Load : Loads) {
4311       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4312       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4313                                 SDValue(Load, 0), MaskOp);
4314       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4315       if (And.getOpcode() == ISD ::AND)
4316         And = SDValue(
4317             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4318       SDValue NewLoad = ReduceLoadWidth(And.getNode());
4319       assert(NewLoad &&
4320              "Shouldn't be masking the load if it can't be narrowed");
4321       CombineTo(Load, NewLoad, NewLoad.getValue(1));
4322     }
4323     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4324     return true;
4325   }
4326   return false;
4327 }
4328
4329 // Unfold
4330 //    x &  (-1 'logical shift' y)
4331 // To
4332 //    (x 'opposite logical shift' y) 'logical shift' y
4333 // if it is better for performance.
4334 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4335   assert(N->getOpcode() == ISD::AND);
4336
4337   SDValue N0 = N->getOperand(0);
4338   SDValue N1 = N->getOperand(1);
4339
4340   // Do we actually prefer shifts over mask?
4341   if (!TLI.preferShiftsToClearExtremeBits(N0))
4342     return SDValue();
4343
4344   // Try to match  (-1 '[outer] logical shift' y)
4345   unsigned OuterShift;
4346   unsigned InnerShift; // The opposite direction to the OuterShift.
4347   SDValue Y;           // Shift amount.
4348   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4349     if (!M.hasOneUse())
4350       return false;
4351     OuterShift = M->getOpcode();
4352     if (OuterShift == ISD::SHL)
4353       InnerShift = ISD::SRL;
4354     else if (OuterShift == ISD::SRL)
4355       InnerShift = ISD::SHL;
4356     else
4357       return false;
4358     if (!isAllOnesConstant(M->getOperand(0)))
4359       return false;
4360     Y = M->getOperand(1);
4361     return true;
4362   };
4363
4364   SDValue X;
4365   if (matchMask(N1))
4366     X = N0;
4367   else if (matchMask(N0))
4368     X = N1;
4369   else
4370     return SDValue();
4371
4372   SDLoc DL(N);
4373   EVT VT = N->getValueType(0);
4374
4375   //     tmp = x   'opposite logical shift' y
4376   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4377   //     ret = tmp 'logical shift' y
4378   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4379
4380   return T1;
4381 }
4382
4383 SDValue DAGCombiner::visitAND(SDNode *N) {
4384   SDValue N0 = N->getOperand(0);
4385   SDValue N1 = N->getOperand(1);
4386   EVT VT = N1.getValueType();
4387
4388   // x & x --> x
4389   if (N0 == N1)
4390     return N0;
4391
4392   // fold vector ops
4393   if (VT.isVector()) {
4394     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4395       return FoldedVOp;
4396
4397     // fold (and x, 0) -> 0, vector edition
4398     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4399       // do not return N0, because undef node may exist in N0
4400       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
4401                              SDLoc(N), N0.getValueType());
4402     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4403       // do not return N1, because undef node may exist in N1
4404       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
4405                              SDLoc(N), N1.getValueType());
4406
4407     // fold (and x, -1) -> x, vector edition
4408     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4409       return N1;
4410     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4411       return N0;
4412   }
4413
4414   // fold (and c1, c2) -> c1&c2
4415   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4416   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4417   if (N0C && N1C && !N1C->isOpaque())
4418     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4419   // canonicalize constant to RHS
4420   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4421      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4422     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4423   // fold (and x, -1) -> x
4424   if (isAllOnesConstant(N1))
4425     return N0;
4426   // if (and x, c) is known to be zero, return 0
4427   unsigned BitWidth = VT.getScalarSizeInBits();
4428   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4429                                    APInt::getAllOnesValue(BitWidth)))
4430     return DAG.getConstant(0, SDLoc(N), VT);
4431
4432   if (SDValue NewSel = foldBinOpIntoSelect(N))
4433     return NewSel;
4434
4435   // reassociate and
4436   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4437     return RAND;
4438
4439   // Try to convert a constant mask AND into a shuffle clear mask.
4440   if (VT.isVector())
4441     if (SDValue Shuffle = XformToShuffleWithZero(N))
4442       return Shuffle;
4443
4444   // fold (and (or x, C), D) -> D if (C & D) == D
4445   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4446     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4447   };
4448   if (N0.getOpcode() == ISD::OR &&
4449       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4450     return N1;
4451   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4452   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4453     SDValue N0Op0 = N0.getOperand(0);
4454     APInt Mask = ~N1C->getAPIntValue();
4455     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4456     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4457       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4458                                  N0.getValueType(), N0Op0);
4459
4460       // Replace uses of the AND with uses of the Zero extend node.
4461       CombineTo(N, Zext);
4462
4463       // We actually want to replace all uses of the any_extend with the
4464       // zero_extend, to avoid duplicating things.  This will later cause this
4465       // AND to be folded.
4466       CombineTo(N0.getNode(), Zext);
4467       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4468     }
4469   }
4470   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4471   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4472   // already be zero by virtue of the width of the base type of the load.
4473   //
4474   // the 'X' node here can either be nothing or an extract_vector_elt to catch
4475   // more cases.
4476   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4477        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4478        N0.getOperand(0).getOpcode() == ISD::LOAD &&
4479        N0.getOperand(0).getResNo() == 0) ||
4480       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4481     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4482                                          N0 : N0.getOperand(0) );
4483
4484     // Get the constant (if applicable) the zero'th operand is being ANDed with.
4485     // This can be a pure constant or a vector splat, in which case we treat the
4486     // vector as a scalar and use the splat value.
4487     APInt Constant = APInt::getNullValue(1);
4488     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4489       Constant = C->getAPIntValue();
4490     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4491       APInt SplatValue, SplatUndef;
4492       unsigned SplatBitSize;
4493       bool HasAnyUndefs;
4494       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4495                                              SplatBitSize, HasAnyUndefs);
4496       if (IsSplat) {
4497         // Undef bits can contribute to a possible optimisation if set, so
4498         // set them.
4499         SplatValue |= SplatUndef;
4500
4501         // The splat value may be something like "0x00FFFFFF", which means 0 for
4502         // the first vector value and FF for the rest, repeating. We need a mask
4503         // that will apply equally to all members of the vector, so AND all the
4504         // lanes of the constant together.
4505         EVT VT = Vector->getValueType(0);
4506         unsigned BitWidth = VT.getScalarSizeInBits();
4507
4508         // If the splat value has been compressed to a bitlength lower
4509         // than the size of the vector lane, we need to re-expand it to
4510         // the lane size.
4511         if (BitWidth > SplatBitSize)
4512           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4513                SplatBitSize < BitWidth;
4514                SplatBitSize = SplatBitSize * 2)
4515             SplatValue |= SplatValue.shl(SplatBitSize);
4516
4517         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4518         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4519         if (SplatBitSize % BitWidth == 0) {
4520           Constant = APInt::getAllOnesValue(BitWidth);
4521           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4522             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4523         }
4524       }
4525     }
4526
4527     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4528     // actually legal and isn't going to get expanded, else this is a false
4529     // optimisation.
4530     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4531                                                     Load->getValueType(0),
4532                                                     Load->getMemoryVT());
4533
4534     // Resize the constant to the same size as the original memory access before
4535     // extension. If it is still the AllOnesValue then this AND is completely
4536     // unneeded.
4537     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4538
4539     bool B;
4540     switch (Load->getExtensionType()) {
4541     default: B = false; break;
4542     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4543     case ISD::ZEXTLOAD:
4544     case ISD::NON_EXTLOAD: B = true; break;
4545     }
4546
4547     if (B && Constant.isAllOnesValue()) {
4548       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4549       // preserve semantics once we get rid of the AND.
4550       SDValue NewLoad(Load, 0);
4551
4552       // Fold the AND away. NewLoad may get replaced immediately.
4553       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4554
4555       if (Load->getExtensionType() == ISD::EXTLOAD) {
4556         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4557                               Load->getValueType(0), SDLoc(Load),
4558                               Load->getChain(), Load->getBasePtr(),
4559                               Load->getOffset(), Load->getMemoryVT(),
4560                               Load->getMemOperand());
4561         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4562         if (Load->getNumValues() == 3) {
4563           // PRE/POST_INC loads have 3 values.
4564           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4565                            NewLoad.getValue(2) };
4566           CombineTo(Load, To, 3, true);
4567         } else {
4568           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4569         }
4570       }
4571
4572       return SDValue(N, 0); // Return N so it doesn't get rechecked!
4573     }
4574   }
4575
4576   // fold (and (load x), 255) -> (zextload x, i8)
4577   // fold (and (extload x, i16), 255) -> (zextload x, i8)
4578   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4579   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4580                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
4581                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4582     if (SDValue Res = ReduceLoadWidth(N)) {
4583       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4584         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4585
4586       AddToWorklist(N);
4587       CombineTo(LN0, Res, Res.getValue(1));
4588       return SDValue(N, 0);
4589     }
4590   }
4591
4592   if (Level >= AfterLegalizeTypes) {
4593     // Attempt to propagate the AND back up to the leaves which, if they're
4594     // loads, can be combined to narrow loads and the AND node can be removed.
4595     // Perform after legalization so that extend nodes will already be
4596     // combined into the loads.
4597     if (BackwardsPropagateMask(N, DAG)) {
4598       return SDValue(N, 0);
4599     }
4600   }
4601
4602   if (SDValue Combined = visitANDLike(N0, N1, N))
4603     return Combined;
4604
4605   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
4606   if (N0.getOpcode() == N1.getOpcode())
4607     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4608       return Tmp;
4609
4610   // Masking the negated extension of a boolean is just the zero-extended
4611   // boolean:
4612   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4613   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4614   //
4615   // Note: the SimplifyDemandedBits fold below can make an information-losing
4616   // transform, and then we have no way to find this better fold.
4617   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4618     if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
4619       SDValue SubRHS = N0.getOperand(1);
4620       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4621           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4622         return SubRHS;
4623       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4624           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4625         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4626     }
4627   }
4628
4629   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4630   // fold (and (sra)) -> (and (srl)) when possible.
4631   if (SimplifyDemandedBits(SDValue(N, 0)))
4632     return SDValue(N, 0);
4633
4634   // fold (zext_inreg (extload x)) -> (zextload x)
4635   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4636     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4637     EVT MemVT = LN0->getMemoryVT();
4638     // If we zero all the possible extended bits, then we can turn this into
4639     // a zextload if we are running before legalize or the operation is legal.
4640     unsigned BitWidth = N1.getScalarValueSizeInBits();
4641     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4642                            BitWidth - MemVT.getScalarSizeInBits())) &&
4643         ((!LegalOperations && !LN0->isVolatile()) ||
4644          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4645       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4646                                        LN0->getChain(), LN0->getBasePtr(),
4647                                        MemVT, LN0->getMemOperand());
4648       AddToWorklist(N);
4649       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4650       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4651     }
4652   }
4653   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4654   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4655       N0.hasOneUse()) {
4656     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4657     EVT MemVT = LN0->getMemoryVT();
4658     // If we zero all the possible extended bits, then we can turn this into
4659     // a zextload if we are running before legalize or the operation is legal.
4660     unsigned BitWidth = N1.getScalarValueSizeInBits();
4661     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4662                            BitWidth - MemVT.getScalarSizeInBits())) &&
4663         ((!LegalOperations && !LN0->isVolatile()) ||
4664          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4665       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4666                                        LN0->getChain(), LN0->getBasePtr(),
4667                                        MemVT, LN0->getMemOperand());
4668       AddToWorklist(N);
4669       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4670       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4671     }
4672   }
4673   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4674   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4675     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4676                                            N0.getOperand(1), false))
4677       return BSwap;
4678   }
4679
4680   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4681     return Shifts;
4682
4683   return SDValue();
4684 }
4685
4686 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4687 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4688                                         bool DemandHighBits) {
4689   if (!LegalOperations)
4690     return SDValue();
4691
4692   EVT VT = N->getValueType(0);
4693   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4694     return SDValue();
4695   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4696     return SDValue();
4697
4698   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4699   bool LookPassAnd0 = false;
4700   bool LookPassAnd1 = false;
4701   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4702       std::swap(N0, N1);
4703   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4704       std::swap(N0, N1);
4705   if (N0.getOpcode() == ISD::AND) {
4706     if (!N0.getNode()->hasOneUse())
4707       return SDValue();
4708     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4709     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4710     // This is needed for X86.
4711     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4712                   N01C->getZExtValue() != 0xFFFF))
4713       return SDValue();
4714     N0 = N0.getOperand(0);
4715     LookPassAnd0 = true;
4716   }
4717
4718   if (N1.getOpcode() == ISD::AND) {
4719     if (!N1.getNode()->hasOneUse())
4720       return SDValue();
4721     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4722     if (!N11C || N11C->getZExtValue() != 0xFF)
4723       return SDValue();
4724     N1 = N1.getOperand(0);
4725     LookPassAnd1 = true;
4726   }
4727
4728   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4729     std::swap(N0, N1);
4730   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4731     return SDValue();
4732   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4733     return SDValue();
4734
4735   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4736   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4737   if (!N01C || !N11C)
4738     return SDValue();
4739   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4740     return SDValue();
4741
4742   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4743   SDValue N00 = N0->getOperand(0);
4744   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4745     if (!N00.getNode()->hasOneUse())
4746       return SDValue();
4747     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4748     if (!N001C || N001C->getZExtValue() != 0xFF)
4749       return SDValue();
4750     N00 = N00.getOperand(0);
4751     LookPassAnd0 = true;
4752   }
4753
4754   SDValue N10 = N1->getOperand(0);
4755   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4756     if (!N10.getNode()->hasOneUse())
4757       return SDValue();
4758     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4759     // Also allow 0xFFFF since the bits will be shifted out. This is needed
4760     // for X86.
4761     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4762                    N101C->getZExtValue() != 0xFFFF))
4763       return SDValue();
4764     N10 = N10.getOperand(0);
4765     LookPassAnd1 = true;
4766   }
4767
4768   if (N00 != N10)
4769     return SDValue();
4770
4771   // Make sure everything beyond the low halfword gets set to zero since the SRL
4772   // 16 will clear the top bits.
4773   unsigned OpSizeInBits = VT.getSizeInBits();
4774   if (DemandHighBits && OpSizeInBits > 16) {
4775     // If the left-shift isn't masked out then the only way this is a bswap is
4776     // if all bits beyond the low 8 are 0. In that case the entire pattern
4777     // reduces to a left shift anyway: leave it for other parts of the combiner.
4778     if (!LookPassAnd0)
4779       return SDValue();
4780
4781     // However, if the right shift isn't masked out then it might be because
4782     // it's not needed. See if we can spot that too.
4783     if (!LookPassAnd1 &&
4784         !DAG.MaskedValueIsZero(
4785             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4786       return SDValue();
4787   }
4788
4789   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4790   if (OpSizeInBits > 16) {
4791     SDLoc DL(N);
4792     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4793                       DAG.getConstant(OpSizeInBits - 16, DL,
4794                                       getShiftAmountTy(VT)));
4795   }
4796   return Res;
4797 }
4798
4799 /// Return true if the specified node is an element that makes up a 32-bit
4800 /// packed halfword byteswap.
4801 /// ((x & 0x000000ff) << 8) |
4802 /// ((x & 0x0000ff00) >> 8) |
4803 /// ((x & 0x00ff0000) << 8) |
4804 /// ((x & 0xff000000) >> 8)
4805 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4806   if (!N.getNode()->hasOneUse())
4807     return false;
4808
4809   unsigned Opc = N.getOpcode();
4810   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4811     return false;
4812
4813   SDValue N0 = N.getOperand(0);
4814   unsigned Opc0 = N0.getOpcode();
4815   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4816     return false;
4817
4818   ConstantSDNode *N1C = nullptr;
4819   // SHL or SRL: look upstream for AND mask operand
4820   if (Opc == ISD::AND)
4821     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4822   else if (Opc0 == ISD::AND)
4823     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4824   if (!N1C)
4825     return false;
4826
4827   unsigned MaskByteOffset;
4828   switch (N1C->getZExtValue()) {
4829   default:
4830     return false;
4831   case 0xFF:       MaskByteOffset = 0; break;
4832   case 0xFF00:     MaskByteOffset = 1; break;
4833   case 0xFFFF:
4834     // In case demanded bits didn't clear the bits that will be shifted out.
4835     // This is needed for X86.
4836     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
4837       MaskByteOffset = 1;
4838       break;
4839     }
4840     return false;
4841   case 0xFF0000:   MaskByteOffset = 2; break;
4842   case 0xFF000000: MaskByteOffset = 3; break;
4843   }
4844
4845   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4846   if (Opc == ISD::AND) {
4847     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4848       // (x >> 8) & 0xff
4849       // (x >> 8) & 0xff0000
4850       if (Opc0 != ISD::SRL)
4851         return false;
4852       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4853       if (!C || C->getZExtValue() != 8)
4854         return false;
4855     } else {
4856       // (x << 8) & 0xff00
4857       // (x << 8) & 0xff000000
4858       if (Opc0 != ISD::SHL)
4859         return false;
4860       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4861       if (!C || C->getZExtValue() != 8)
4862         return false;
4863     }
4864   } else if (Opc == ISD::SHL) {
4865     // (x & 0xff) << 8
4866     // (x & 0xff0000) << 8
4867     if (MaskByteOffset != 0 && MaskByteOffset != 2)
4868       return false;
4869     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4870     if (!C || C->getZExtValue() != 8)
4871       return false;
4872   } else { // Opc == ISD::SRL
4873     // (x & 0xff00) >> 8
4874     // (x & 0xff000000) >> 8
4875     if (MaskByteOffset != 1 && MaskByteOffset != 3)
4876       return false;
4877     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4878     if (!C || C->getZExtValue() != 8)
4879       return false;
4880   }
4881
4882   if (Parts[MaskByteOffset])
4883     return false;
4884
4885   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4886   return true;
4887 }
4888
4889 /// Match a 32-bit packed halfword bswap. That is
4890 /// ((x & 0x000000ff) << 8) |
4891 /// ((x & 0x0000ff00) >> 8) |
4892 /// ((x & 0x00ff0000) << 8) |
4893 /// ((x & 0xff000000) >> 8)
4894 /// => (rotl (bswap x), 16)
4895 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4896   if (!LegalOperations)
4897     return SDValue();
4898
4899   EVT VT = N->getValueType(0);
4900   if (VT != MVT::i32)
4901     return SDValue();
4902   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4903     return SDValue();
4904
4905   // Look for either
4906   // (or (or (and), (and)), (or (and), (and)))
4907   // (or (or (or (and), (and)), (and)), (and))
4908   if (N0.getOpcode() != ISD::OR)
4909     return SDValue();
4910   SDValue N00 = N0.getOperand(0);
4911   SDValue N01 = N0.getOperand(1);
4912   SDNode *Parts[4] = {};
4913
4914   if (N1.getOpcode() == ISD::OR &&
4915       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4916     // (or (or (and), (and)), (or (and), (and)))
4917     if (!isBSwapHWordElement(N00, Parts))
4918       return SDValue();
4919
4920     if (!isBSwapHWordElement(N01, Parts))
4921       return SDValue();
4922     SDValue N10 = N1.getOperand(0);
4923     if (!isBSwapHWordElement(N10, Parts))
4924       return SDValue();
4925     SDValue N11 = N1.getOperand(1);
4926     if (!isBSwapHWordElement(N11, Parts))
4927       return SDValue();
4928   } else {
4929     // (or (or (or (and), (and)), (and)), (and))
4930     if (!isBSwapHWordElement(N1, Parts))
4931       return SDValue();
4932     if (!isBSwapHWordElement(N01, Parts))
4933       return SDValue();
4934     if (N00.getOpcode() != ISD::OR)
4935       return SDValue();
4936     SDValue N000 = N00.getOperand(0);
4937     if (!isBSwapHWordElement(N000, Parts))
4938       return SDValue();
4939     SDValue N001 = N00.getOperand(1);
4940     if (!isBSwapHWordElement(N001, Parts))
4941       return SDValue();
4942   }
4943
4944   // Make sure the parts are all coming from the same node.
4945   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4946     return SDValue();
4947
4948   SDLoc DL(N);
4949   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4950                               SDValue(Parts[0], 0));
4951
4952   // Result of the bswap should be rotated by 16. If it's not legal, then
4953   // do  (x << 16) | (x >> 16).
4954   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4955   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4956     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4957   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4958     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4959   return DAG.getNode(ISD::OR, DL, VT,
4960                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4961                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4962 }
4963
4964 /// This contains all DAGCombine rules which reduce two values combined by
4965 /// an Or operation to a single value \see visitANDLike().
4966 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4967   EVT VT = N1.getValueType();
4968   SDLoc DL(N);
4969
4970   // fold (or x, undef) -> -1
4971   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4972     return DAG.getAllOnesConstant(DL, VT);
4973
4974   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4975     return V;
4976
4977   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
4978   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4979       // Don't increase # computations.
4980       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4981     // We can only do this xform if we know that bits from X that are set in C2
4982     // but not in C1 are already zero.  Likewise for Y.
4983     if (const ConstantSDNode *N0O1C =
4984         getAsNonOpaqueConstant(N0.getOperand(1))) {
4985       if (const ConstantSDNode *N1O1C =
4986           getAsNonOpaqueConstant(N1.getOperand(1))) {
4987         // We can only do this xform if we know that bits from X that are set in
4988         // C2 but not in C1 are already zero.  Likewise for Y.
4989         const APInt &LHSMask = N0O1C->getAPIntValue();
4990         const APInt &RHSMask = N1O1C->getAPIntValue();
4991
4992         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4993             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4994           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4995                                   N0.getOperand(0), N1.getOperand(0));
4996           return DAG.getNode(ISD::AND, DL, VT, X,
4997                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
4998         }
4999       }
5000     }
5001   }
5002
5003   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5004   if (N0.getOpcode() == ISD::AND &&
5005       N1.getOpcode() == ISD::AND &&
5006       N0.getOperand(0) == N1.getOperand(0) &&
5007       // Don't increase # computations.
5008       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5009     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5010                             N0.getOperand(1), N1.getOperand(1));
5011     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5012   }
5013
5014   return SDValue();
5015 }
5016
5017 SDValue DAGCombiner::visitOR(SDNode *N) {
5018   SDValue N0 = N->getOperand(0);
5019   SDValue N1 = N->getOperand(1);
5020   EVT VT = N1.getValueType();
5021
5022   // x | x --> x
5023   if (N0 == N1)
5024     return N0;
5025
5026   // fold vector ops
5027   if (VT.isVector()) {
5028     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5029       return FoldedVOp;
5030
5031     // fold (or x, 0) -> x, vector edition
5032     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5033       return N1;
5034     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5035       return N0;
5036
5037     // fold (or x, -1) -> -1, vector edition
5038     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5039       // do not return N0, because undef node may exist in N0
5040       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5041     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5042       // do not return N1, because undef node may exist in N1
5043       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5044
5045     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5046     // Do this only if the resulting shuffle is legal.
5047     if (isa<ShuffleVectorSDNode>(N0) &&
5048         isa<ShuffleVectorSDNode>(N1) &&
5049         // Avoid folding a node with illegal type.
5050         TLI.isTypeLegal(VT)) {
5051       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5052       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5053       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5054       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5055       // Ensure both shuffles have a zero input.
5056       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5057         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5058         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5059         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5060         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5061         bool CanFold = true;
5062         int NumElts = VT.getVectorNumElements();
5063         SmallVector<int, 4> Mask(NumElts);
5064
5065         for (int i = 0; i != NumElts; ++i) {
5066           int M0 = SV0->getMaskElt(i);
5067           int M1 = SV1->getMaskElt(i);
5068
5069           // Determine if either index is pointing to a zero vector.
5070           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5071           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5072
5073           // If one element is zero and the otherside is undef, keep undef.
5074           // This also handles the case that both are undef.
5075           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5076             Mask[i] = -1;
5077             continue;
5078           }
5079
5080           // Make sure only one of the elements is zero.
5081           if (M0Zero == M1Zero) {
5082             CanFold = false;
5083             break;
5084           }
5085
5086           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5087
5088           // We have a zero and non-zero element. If the non-zero came from
5089           // SV0 make the index a LHS index. If it came from SV1, make it
5090           // a RHS index. We need to mod by NumElts because we don't care
5091           // which operand it came from in the original shuffles.
5092           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5093         }
5094
5095         if (CanFold) {
5096           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5097           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5098
5099           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5100           if (!LegalMask) {
5101             std::swap(NewLHS, NewRHS);
5102             ShuffleVectorSDNode::commuteMask(Mask);
5103             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5104           }
5105
5106           if (LegalMask)
5107             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5108         }
5109       }
5110     }
5111   }
5112
5113   // fold (or c1, c2) -> c1|c2
5114   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5115   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5116   if (N0C && N1C && !N1C->isOpaque())
5117     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5118   // canonicalize constant to RHS
5119   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5120      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5121     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5122   // fold (or x, 0) -> x
5123   if (isNullConstant(N1))
5124     return N0;
5125   // fold (or x, -1) -> -1
5126   if (isAllOnesConstant(N1))
5127     return N1;
5128
5129   if (SDValue NewSel = foldBinOpIntoSelect(N))
5130     return NewSel;
5131
5132   // fold (or x, c) -> c iff (x & ~c) == 0
5133   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5134     return N1;
5135
5136   if (SDValue Combined = visitORLike(N0, N1, N))
5137     return Combined;
5138
5139   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5140   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5141     return BSwap;
5142   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5143     return BSwap;
5144
5145   // reassociate or
5146   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5147     return ROR;
5148
5149   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5150   // iff (c1 & c2) != 0.
5151   auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5152     return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
5153   };
5154   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5155       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
5156     if (SDValue COR = DAG.FoldConstantArithmetic(
5157             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5158       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5159       AddToWorklist(IOR.getNode());
5160       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5161     }
5162   }
5163
5164   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5165   if (N0.getOpcode() == N1.getOpcode())
5166     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5167       return Tmp;
5168
5169   // See if this is some rotate idiom.
5170   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5171     return SDValue(Rot, 0);
5172
5173   if (SDValue Load = MatchLoadCombine(N))
5174     return Load;
5175
5176   // Simplify the operands using demanded-bits information.
5177   if (SimplifyDemandedBits(SDValue(N, 0)))
5178     return SDValue(N, 0);
5179
5180   return SDValue();
5181 }
5182
5183 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5184   if (Op.getOpcode() == ISD::AND &&
5185       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5186     Mask = Op.getOperand(1);
5187     return Op.getOperand(0);
5188   }
5189   return Op;
5190 }
5191
5192 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5193 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5194                             SDValue &Mask) {
5195   Op = stripConstantMask(DAG, Op, Mask);
5196   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5197     Shift = Op;
5198     return true;
5199   }
5200   return false;
5201 }
5202
5203 /// Helper function for visitOR to extract the needed side of a rotate idiom
5204 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
5205 /// InstCombine merged some outside op with one of the shifts from
5206 /// the rotate pattern.
5207 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5208 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5209 /// patterns:
5210 ///
5211 ///   (or (mul v c0) (shrl (mul v c1) c2)):
5212 ///     expands (mul v c0) -> (shl (mul v c1) c3)
5213 ///
5214 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
5215 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5216 ///
5217 ///   (or (shl v c0) (shrl (shl v c1) c2)):
5218 ///     expands (shl v c0) -> (shl (shl v c1) c3)
5219 ///
5220 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
5221 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5222 ///
5223 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5224 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5225                                      SDValue ExtractFrom, SDValue &Mask,
5226                                      const SDLoc &DL) {
5227   assert(OppShift && ExtractFrom && "Empty SDValue");
5228   assert(
5229       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5230       "Existing shift must be valid as a rotate half");
5231
5232   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5233   // Preconditions:
5234   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5235   //
5236   // Find opcode of the needed shift to be extracted from (op0 v c0).
5237   unsigned Opcode = ISD::DELETED_NODE;
5238   bool IsMulOrDiv = false;
5239   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5240   // opcode or its arithmetic (mul or udiv) variant.
5241   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5242     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5243     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5244       return false;
5245     Opcode = NeededShift;
5246     return true;
5247   };
5248   // op0 must be either the needed shift opcode or the mul/udiv equivalent
5249   // that the needed shift can be extracted from.
5250   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5251       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5252     return SDValue();
5253
5254   // op0 must be the same opcode on both sides, have the same LHS argument,
5255   // and produce the same value type.
5256   SDValue OppShiftLHS = OppShift.getOperand(0);
5257   EVT ShiftedVT = OppShiftLHS.getValueType();
5258   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5259       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5260       ShiftedVT != ExtractFrom.getValueType())
5261     return SDValue();
5262
5263   // Amount of the existing shift.
5264   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5265   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5266   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5267   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5268   ConstantSDNode *ExtractFromCst =
5269       isConstOrConstSplat(ExtractFrom.getOperand(1));
5270   // TODO: We should be able to handle non-uniform constant vectors for these values
5271   // Check that we have constant values.
5272   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5273       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5274       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5275     return SDValue();
5276
5277   // Compute the shift amount we need to extract to complete the rotate.
5278   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5279   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5280     return SDValue();
5281   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5282   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5283   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5284   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5285   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5286
5287   // Now try extract the needed shift from the ExtractFrom op and see if the
5288   // result matches up with the existing shift's LHS op.
5289   if (IsMulOrDiv) {
5290     // Op to extract from is a mul or udiv by a constant.
5291     // Check:
5292     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5293     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5294     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5295                                                  NeededShiftAmt.getZExtValue());
5296     APInt ResultAmt;
5297     APInt Rem;
5298     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5299     if (Rem != 0 || ResultAmt != OppLHSAmt)
5300       return SDValue();
5301   } else {
5302     // Op to extract from is a shift by a constant.
5303     // Check:
5304     //      c2 - (bitwidth(op0 v c0) - c1) == c0
5305     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5306                                           ExtractFromAmt.getBitWidth()))
5307       return SDValue();
5308   }
5309
5310   // Return the expanded shift op that should allow a rotate to be formed.
5311   EVT ShiftVT = OppShift.getOperand(1).getValueType();
5312   EVT ResVT = ExtractFrom.getValueType();
5313   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5314   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5315 }
5316
5317 // Return true if we can prove that, whenever Neg and Pos are both in the
5318 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
5319 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5320 //
5321 //     (or (shift1 X, Neg), (shift2 X, Pos))
5322 //
5323 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5324 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
5325 // to consider shift amounts with defined behavior.
5326 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5327                            SelectionDAG &DAG) {
5328   // If EltSize is a power of 2 then:
5329   //
5330   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5331   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5332   //
5333   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5334   // for the stronger condition:
5335   //
5336   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
5337   //
5338   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5339   // we can just replace Neg with Neg' for the rest of the function.
5340   //
5341   // In other cases we check for the even stronger condition:
5342   //
5343   //     Neg == EltSize - Pos                                    [B]
5344   //
5345   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
5346   // behavior if Pos == 0 (and consequently Neg == EltSize).
5347   //
5348   // We could actually use [A] whenever EltSize is a power of 2, but the
5349   // only extra cases that it would match are those uninteresting ones
5350   // where Neg and Pos are never in range at the same time.  E.g. for
5351   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5352   // as well as (sub 32, Pos), but:
5353   //
5354   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5355   //
5356   // always invokes undefined behavior for 32-bit X.
5357   //
5358   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5359   unsigned MaskLoBits = 0;
5360   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5361     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5362       KnownBits Known;
5363       DAG.computeKnownBits(Neg.getOperand(0), Known);
5364       unsigned Bits = Log2_64(EltSize);
5365       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5366           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5367         Neg = Neg.getOperand(0);
5368         MaskLoBits = Bits;
5369       }
5370     }
5371   }
5372
5373   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5374   if (Neg.getOpcode() != ISD::SUB)
5375     return false;
5376   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
5377   if (!NegC)
5378     return false;
5379   SDValue NegOp1 = Neg.getOperand(1);
5380
5381   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5382   // Pos'.  The truncation is redundant for the purpose of the equality.
5383   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5384     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5385       KnownBits Known;
5386       DAG.computeKnownBits(Pos.getOperand(0), Known);
5387       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5388           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5389            MaskLoBits))
5390         Pos = Pos.getOperand(0);
5391     }
5392   }
5393
5394   // The condition we need is now:
5395   //
5396   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5397   //
5398   // If NegOp1 == Pos then we need:
5399   //
5400   //              EltSize & Mask == NegC & Mask
5401   //
5402   // (because "x & Mask" is a truncation and distributes through subtraction).
5403   APInt Width;
5404   if (Pos == NegOp1)
5405     Width = NegC->getAPIntValue();
5406
5407   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5408   // Then the condition we want to prove becomes:
5409   //
5410   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5411   //
5412   // which, again because "x & Mask" is a truncation, becomes:
5413   //
5414   //                NegC & Mask == (EltSize - PosC) & Mask
5415   //             EltSize & Mask == (NegC + PosC) & Mask
5416   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5417     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5418       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5419     else
5420       return false;
5421   } else
5422     return false;
5423
5424   // Now we just need to check that EltSize & Mask == Width & Mask.
5425   if (MaskLoBits)
5426     // EltSize & Mask is 0 since Mask is EltSize - 1.
5427     return Width.getLoBits(MaskLoBits) == 0;
5428   return Width == EltSize;
5429 }
5430
5431 // A subroutine of MatchRotate used once we have found an OR of two opposite
5432 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
5433 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5434 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
5435 // Neg with outer conversions stripped away.
5436 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5437                                        SDValue Neg, SDValue InnerPos,
5438                                        SDValue InnerNeg, unsigned PosOpcode,
5439                                        unsigned NegOpcode, const SDLoc &DL) {
5440   // fold (or (shl x, (*ext y)),
5441   //          (srl x, (*ext (sub 32, y)))) ->
5442   //   (rotl x, y) or (rotr x, (sub 32, y))
5443   //
5444   // fold (or (shl x, (*ext (sub 32, y))),
5445   //          (srl x, (*ext y))) ->
5446   //   (rotr x, y) or (rotl x, (sub 32, y))
5447   EVT VT = Shifted.getValueType();
5448   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5449     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5450     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5451                        HasPos ? Pos : Neg).getNode();
5452   }
5453
5454   return nullptr;
5455 }
5456
5457 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
5458 // idioms for rotate, and if the target supports rotation instructions, generate
5459 // a rot[lr].
5460 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5461   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
5462   EVT VT = LHS.getValueType();
5463   if (!TLI.isTypeLegal(VT)) return nullptr;
5464
5465   // The target must have at least one rotate flavor.
5466   bool HasROTL = hasOperation(ISD::ROTL, VT);
5467   bool HasROTR = hasOperation(ISD::ROTR, VT);
5468   if (!HasROTL && !HasROTR) return nullptr;
5469
5470   // Check for truncated rotate.
5471   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5472       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5473     assert(LHS.getValueType() == RHS.getValueType());
5474     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5475       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5476                          SDValue(Rot, 0)).getNode();
5477     }
5478   }
5479
5480   // Match "(X shl/srl V1) & V2" where V2 may not be present.
5481   SDValue LHSShift;   // The shift.
5482   SDValue LHSMask;    // AND value if any.
5483   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
5484
5485   SDValue RHSShift;   // The shift.
5486   SDValue RHSMask;    // AND value if any.
5487   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
5488
5489   // If neither side matched a rotate half, bail
5490   if (!LHSShift && !RHSShift)
5491     return nullptr;
5492
5493   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5494   // side of the rotate, so try to handle that here. In all cases we need to
5495   // pass the matched shift from the opposite side to compute the opcode and
5496   // needed shift amount to extract.  We still want to do this if both sides
5497   // matched a rotate half because one half may be a potential overshift that
5498   // can be broken down (ie if InstCombine merged two shl or srl ops into a
5499   // single one).
5500
5501   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5502   if (LHSShift)
5503     if (SDValue NewRHSShift =
5504             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
5505       RHSShift = NewRHSShift;
5506   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5507   if (RHSShift)
5508     if (SDValue NewLHSShift =
5509             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
5510       LHSShift = NewLHSShift;
5511
5512   // If a side is still missing, nothing else we can do.
5513   if (!RHSShift || !LHSShift)
5514     return nullptr;
5515
5516   // At this point we've matched or extracted a shift op on each side.
5517
5518   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
5519     return nullptr;   // Not shifting the same value.
5520
5521   if (LHSShift.getOpcode() == RHSShift.getOpcode())
5522     return nullptr;   // Shifts must disagree.
5523
5524   // Canonicalize shl to left side in a shl/srl pair.
5525   if (RHSShift.getOpcode() == ISD::SHL) {
5526     std::swap(LHS, RHS);
5527     std::swap(LHSShift, RHSShift);
5528     std::swap(LHSMask, RHSMask);
5529   }
5530
5531   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5532   SDValue LHSShiftArg = LHSShift.getOperand(0);
5533   SDValue LHSShiftAmt = LHSShift.getOperand(1);
5534   SDValue RHSShiftArg = RHSShift.getOperand(0);
5535   SDValue RHSShiftAmt = RHSShift.getOperand(1);
5536
5537   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5538   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5539   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
5540                                         ConstantSDNode *RHS) {
5541     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
5542   };
5543   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
5544     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
5545                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
5546
5547     // If there is an AND of either shifted operand, apply it to the result.
5548     if (LHSMask.getNode() || RHSMask.getNode()) {
5549       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5550       SDValue Mask = AllOnes;
5551
5552       if (LHSMask.getNode()) {
5553         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
5554         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5555                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
5556       }
5557       if (RHSMask.getNode()) {
5558         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
5559         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5560                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
5561       }
5562
5563       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
5564     }
5565
5566     return Rot.getNode();
5567   }
5568
5569   // If there is a mask here, and we have a variable shift, we can't be sure
5570   // that we're masking out the right stuff.
5571   if (LHSMask.getNode() || RHSMask.getNode())
5572     return nullptr;
5573
5574   // If the shift amount is sign/zext/any-extended just peel it off.
5575   SDValue LExtOp0 = LHSShiftAmt;
5576   SDValue RExtOp0 = RHSShiftAmt;
5577   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5578        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5579        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5580        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
5581       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5582        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5583        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5584        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
5585     LExtOp0 = LHSShiftAmt.getOperand(0);
5586     RExtOp0 = RHSShiftAmt.getOperand(0);
5587   }
5588
5589   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
5590                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
5591   if (TryL)
5592     return TryL;
5593
5594   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
5595                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
5596   if (TryR)
5597     return TryR;
5598
5599   return nullptr;
5600 }
5601
5602 namespace {
5603
5604 /// Represents known origin of an individual byte in load combine pattern. The
5605 /// value of the byte is either constant zero or comes from memory.
5606 struct ByteProvider {
5607   // For constant zero providers Load is set to nullptr. For memory providers
5608   // Load represents the node which loads the byte from memory.
5609   // ByteOffset is the offset of the byte in the value produced by the load.
5610   LoadSDNode *Load = nullptr;
5611   unsigned ByteOffset = 0;
5612
5613   ByteProvider() = default;
5614
5615   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
5616     return ByteProvider(Load, ByteOffset);
5617   }
5618
5619   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
5620
5621   bool isConstantZero() const { return !Load; }
5622   bool isMemory() const { return Load; }
5623
5624   bool operator==(const ByteProvider &Other) const {
5625     return Other.Load == Load && Other.ByteOffset == ByteOffset;
5626   }
5627
5628 private:
5629   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5630       : Load(Load), ByteOffset(ByteOffset) {}
5631 };
5632
5633 } // end anonymous namespace
5634
5635 /// Recursively traverses the expression calculating the origin of the requested
5636 /// byte of the given value. Returns None if the provider can't be calculated.
5637 ///
5638 /// For all the values except the root of the expression verifies that the value
5639 /// has exactly one use and if it's not true return None. This way if the origin
5640 /// of the byte is returned it's guaranteed that the values which contribute to
5641 /// the byte are not used outside of this expression.
5642 ///
5643 /// Because the parts of the expression are not allowed to have more than one
5644 /// use this function iterates over trees, not DAGs. So it never visits the same
5645 /// node more than once.
5646 static const Optional<ByteProvider>
5647 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5648                       bool Root = false) {
5649   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5650   if (Depth == 10)
5651     return None;
5652
5653   if (!Root && !Op.hasOneUse())
5654     return None;
5655
5656   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
5657   unsigned BitWidth = Op.getValueSizeInBits();
5658   if (BitWidth % 8 != 0)
5659     return None;
5660   unsigned ByteWidth = BitWidth / 8;
5661   assert(Index < ByteWidth && "invalid index requested");
5662   (void) ByteWidth;
5663
5664   switch (Op.getOpcode()) {
5665   case ISD::OR: {
5666     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5667     if (!LHS)
5668       return None;
5669     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5670     if (!RHS)
5671       return None;
5672
5673     if (LHS->isConstantZero())
5674       return RHS;
5675     if (RHS->isConstantZero())
5676       return LHS;
5677     return None;
5678   }
5679   case ISD::SHL: {
5680     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5681     if (!ShiftOp)
5682       return None;
5683
5684     uint64_t BitShift = ShiftOp->getZExtValue();
5685     if (BitShift % 8 != 0)
5686       return None;
5687     uint64_t ByteShift = BitShift / 8;
5688
5689     return Index < ByteShift
5690                ? ByteProvider::getConstantZero()
5691                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5692                                        Depth + 1);
5693   }
5694   case ISD::ANY_EXTEND:
5695   case ISD::SIGN_EXTEND:
5696   case ISD::ZERO_EXTEND: {
5697     SDValue NarrowOp = Op->getOperand(0);
5698     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5699     if (NarrowBitWidth % 8 != 0)
5700       return None;
5701     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5702
5703     if (Index >= NarrowByteWidth)
5704       return Op.getOpcode() == ISD::ZERO_EXTEND
5705                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5706                  : None;
5707     return calculateByteProvider(NarrowOp, Index, Depth + 1);
5708   }
5709   case ISD::BSWAP:
5710     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5711                                  Depth + 1);
5712   case ISD::LOAD: {
5713     auto L = cast<LoadSDNode>(Op.getNode());
5714     if (L->isVolatile() || L->isIndexed())
5715       return None;
5716
5717     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5718     if (NarrowBitWidth % 8 != 0)
5719       return None;
5720     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5721
5722     if (Index >= NarrowByteWidth)
5723       return L->getExtensionType() == ISD::ZEXTLOAD
5724                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5725                  : None;
5726     return ByteProvider::getMemory(L, Index);
5727   }
5728   }
5729
5730   return None;
5731 }
5732
5733 /// Match a pattern where a wide type scalar value is loaded by several narrow
5734 /// loads and combined by shifts and ors. Fold it into a single load or a load
5735 /// and a BSWAP if the targets supports it.
5736 ///
5737 /// Assuming little endian target:
5738 ///  i8 *a = ...
5739 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5740 /// =>
5741 ///  i32 val = *((i32)a)
5742 ///
5743 ///  i8 *a = ...
5744 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5745 /// =>
5746 ///  i32 val = BSWAP(*((i32)a))
5747 ///
5748 /// TODO: This rule matches complex patterns with OR node roots and doesn't
5749 /// interact well with the worklist mechanism. When a part of the pattern is
5750 /// updated (e.g. one of the loads) its direct users are put into the worklist,
5751 /// but the root node of the pattern which triggers the load combine is not
5752 /// necessarily a direct user of the changed node. For example, once the address
5753 /// of t28 load is reassociated load combine won't be triggered:
5754 ///             t25: i32 = add t4, Constant:i32<2>
5755 ///           t26: i64 = sign_extend t25
5756 ///        t27: i64 = add t2, t26
5757 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5758 ///     t29: i32 = zero_extend t28
5759 ///   t32: i32 = shl t29, Constant:i8<8>
5760 /// t33: i32 = or t23, t32
5761 /// As a possible fix visitLoad can check if the load can be a part of a load
5762 /// combine pattern and add corresponding OR roots to the worklist.
5763 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
5764   assert(N->getOpcode() == ISD::OR &&
5765          "Can only match load combining against OR nodes");
5766
5767   // Handles simple types only
5768   EVT VT = N->getValueType(0);
5769   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
5770     return SDValue();
5771   unsigned ByteWidth = VT.getSizeInBits() / 8;
5772
5773   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5774   // Before legalize we can introduce too wide illegal loads which will be later
5775   // split into legal sized loads. This enables us to combine i64 load by i8
5776   // patterns to a couple of i32 loads on 32 bit targets.
5777   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
5778     return SDValue();
5779
5780   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
5781     unsigned BW, unsigned i) { return i; };
5782   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
5783     unsigned BW, unsigned i) { return BW - i - 1; };
5784
5785   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
5786   auto MemoryByteOffset = [&] (ByteProvider P) {
5787     assert(P.isMemory() && "Must be a memory byte provider");
5788     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
5789     assert(LoadBitWidth % 8 == 0 &&
5790            "can only analyze providers for individual bytes not bit");
5791     unsigned LoadByteWidth = LoadBitWidth / 8;
5792     return IsBigEndianTarget
5793             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
5794             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
5795   };
5796
5797   Optional<BaseIndexOffset> Base;
5798   SDValue Chain;
5799
5800   SmallPtrSet<LoadSDNode *, 8> Loads;
5801   Optional<ByteProvider> FirstByteProvider;
5802   int64_t FirstOffset = INT64_MAX;
5803
5804   // Check if all the bytes of the OR we are looking at are loaded from the same
5805   // base address. Collect bytes offsets from Base address in ByteOffsets.
5806   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5807   for (unsigned i = 0; i < ByteWidth; i++) {
5808     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5809     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
5810       return SDValue();
5811
5812     LoadSDNode *L = P->Load;
5813     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
5814            "Must be enforced by calculateByteProvider");
5815     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
5816
5817     // All loads must share the same chain
5818     SDValue LChain = L->getChain();
5819     if (!Chain)
5820       Chain = LChain;
5821     else if (Chain != LChain)
5822       return SDValue();
5823
5824     // Loads must share the same base address
5825     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
5826     int64_t ByteOffsetFromBase = 0;
5827     if (!Base)
5828       Base = Ptr;
5829     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
5830       return SDValue();
5831
5832     // Calculate the offset of the current byte from the base address
5833     ByteOffsetFromBase += MemoryByteOffset(*P);
5834     ByteOffsets[i] = ByteOffsetFromBase;
5835
5836     // Remember the first byte load
5837     if (ByteOffsetFromBase < FirstOffset) {
5838       FirstByteProvider = P;
5839       FirstOffset = ByteOffsetFromBase;
5840     }
5841
5842     Loads.insert(L);
5843   }
5844   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
5845          "memory, so there must be at least one load which produces the value");
5846   assert(Base && "Base address of the accessed memory location must be set");
5847   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5848
5849   // Check if the bytes of the OR we are looking at match with either big or
5850   // little endian value load
5851   bool BigEndian = true, LittleEndian = true;
5852   for (unsigned i = 0; i < ByteWidth; i++) {
5853     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5854     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5855     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5856     if (!BigEndian && !LittleEndian)
5857       return SDValue();
5858   }
5859   assert((BigEndian != LittleEndian) && "should be either or");
5860   assert(FirstByteProvider && "must be set");
5861
5862   // Ensure that the first byte is loaded from zero offset of the first load.
5863   // So the combined value can be loaded from the first load address.
5864   if (MemoryByteOffset(*FirstByteProvider) != 0)
5865     return SDValue();
5866   LoadSDNode *FirstLoad = FirstByteProvider->Load;
5867
5868   // The node we are looking at matches with the pattern, check if we can
5869   // replace it with a single load and bswap if needed.
5870
5871   // If the load needs byte swap check if the target supports it
5872   bool NeedsBswap = IsBigEndianTarget != BigEndian;
5873
5874   // Before legalize we can introduce illegal bswaps which will be later
5875   // converted to an explicit bswap sequence. This way we end up with a single
5876   // load and byte shuffling instead of several loads and byte shuffling.
5877   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5878     return SDValue();
5879
5880   // Check that a load of the wide type is both allowed and fast on the target
5881   bool Fast = false;
5882   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5883                                         VT, FirstLoad->getAddressSpace(),
5884                                         FirstLoad->getAlignment(), &Fast);
5885   if (!Allowed || !Fast)
5886     return SDValue();
5887
5888   SDValue NewLoad =
5889       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5890                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5891
5892   // Transfer chain users from old loads to the new load.
5893   for (LoadSDNode *L : Loads)
5894     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5895
5896   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5897 }
5898
5899 // If the target has andn, bsl, or a similar bit-select instruction,
5900 // we want to unfold masked merge, with canonical pattern of:
5901 //   |        A  |  |B|
5902 //   ((x ^ y) & m) ^ y
5903 //    |  D  |
5904 // Into:
5905 //   (x & m) | (y & ~m)
5906 // If y is a constant, and the 'andn' does not work with immediates,
5907 // we unfold into a different pattern:
5908 //   ~(~x & m) & (m | y)
5909 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
5910 //       the very least that breaks andnpd / andnps patterns, and because those
5911 //       patterns are simplified in IR and shouldn't be created in the DAG
5912 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
5913   assert(N->getOpcode() == ISD::XOR);
5914
5915   // Don't touch 'not' (i.e. where y = -1).
5916   if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1)))
5917     return SDValue();
5918
5919   EVT VT = N->getValueType(0);
5920
5921   // There are 3 commutable operators in the pattern,
5922   // so we have to deal with 8 possible variants of the basic pattern.
5923   SDValue X, Y, M;
5924   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
5925     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
5926       return false;
5927     SDValue Xor = And.getOperand(XorIdx);
5928     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
5929       return false;
5930     SDValue Xor0 = Xor.getOperand(0);
5931     SDValue Xor1 = Xor.getOperand(1);
5932     // Don't touch 'not' (i.e. where y = -1).
5933     if (isAllOnesConstantOrAllOnesSplatConstant(Xor1))
5934       return false;
5935     if (Other == Xor0)
5936       std::swap(Xor0, Xor1);
5937     if (Other != Xor1)
5938       return false;
5939     X = Xor0;
5940     Y = Xor1;
5941     M = And.getOperand(XorIdx ? 0 : 1);
5942     return true;
5943   };
5944
5945   SDValue N0 = N->getOperand(0);
5946   SDValue N1 = N->getOperand(1);
5947   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
5948       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
5949     return SDValue();
5950
5951   // Don't do anything if the mask is constant. This should not be reachable.
5952   // InstCombine should have already unfolded this pattern, and DAGCombiner
5953   // probably shouldn't produce it, too.
5954   if (isa<ConstantSDNode>(M.getNode()))
5955     return SDValue();
5956
5957   // We can transform if the target has AndNot
5958   if (!TLI.hasAndNot(M))
5959     return SDValue();
5960
5961   SDLoc DL(N);
5962
5963   // If Y is a constant, check that 'andn' works with immediates.
5964   if (!TLI.hasAndNot(Y)) {
5965     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
5966     // If not, we need to do a bit more work to make sure andn is still used.
5967     SDValue NotX = DAG.getNOT(DL, X, VT);
5968     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
5969     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
5970     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
5971     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
5972   }
5973
5974   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
5975   SDValue NotM = DAG.getNOT(DL, M, VT);
5976   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
5977
5978   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
5979 }
5980
5981 SDValue DAGCombiner::visitXOR(SDNode *N) {
5982   SDValue N0 = N->getOperand(0);
5983   SDValue N1 = N->getOperand(1);
5984   EVT VT = N0.getValueType();
5985
5986   // fold vector ops
5987   if (VT.isVector()) {
5988     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5989       return FoldedVOp;
5990
5991     // fold (xor x, 0) -> x, vector edition
5992     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5993       return N1;
5994     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5995       return N0;
5996   }
5997
5998   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5999   if (N0.isUndef() && N1.isUndef())
6000     return DAG.getConstant(0, SDLoc(N), VT);
6001   // fold (xor x, undef) -> undef
6002   if (N0.isUndef())
6003     return N0;
6004   if (N1.isUndef())
6005     return N1;
6006   // fold (xor c1, c2) -> c1^c2
6007   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6008   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6009   if (N0C && N1C)
6010     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
6011   // canonicalize constant to RHS
6012   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6013      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6014     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
6015   // fold (xor x, 0) -> x
6016   if (isNullConstant(N1))
6017     return N0;
6018
6019   if (SDValue NewSel = foldBinOpIntoSelect(N))
6020     return NewSel;
6021
6022   // reassociate xor
6023   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1, N->getFlags()))
6024     return RXOR;
6025
6026   // fold !(x cc y) -> (x !cc y)
6027   SDValue LHS, RHS, CC;
6028   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6029     bool isInt = LHS.getValueType().isInteger();
6030     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6031                                                isInt);
6032
6033     if (!LegalOperations ||
6034         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6035       switch (N0.getOpcode()) {
6036       default:
6037         llvm_unreachable("Unhandled SetCC Equivalent!");
6038       case ISD::SETCC:
6039         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6040       case ISD::SELECT_CC:
6041         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6042                                N0.getOperand(3), NotCC);
6043       }
6044     }
6045   }
6046
6047   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6048   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
6049       N0.getNode()->hasOneUse() &&
6050       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6051     SDValue V = N0.getOperand(0);
6052     SDLoc DL(N0);
6053     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
6054                     DAG.getConstant(1, DL, V.getValueType()));
6055     AddToWorklist(V.getNode());
6056     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
6057   }
6058
6059   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6060   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6061       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
6062     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6063     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6064       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
6065       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6066       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6067       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6068       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
6069     }
6070   }
6071   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6072   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6073       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
6074     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6075     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6076       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
6077       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6078       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6079       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6080       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
6081     }
6082   }
6083   // fold (xor (and x, y), y) -> (and (not x), y)
6084   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6085       N0->getOperand(1) == N1) {
6086     SDValue X = N0->getOperand(0);
6087     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6088     AddToWorklist(NotX.getNode());
6089     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
6090   }
6091
6092   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6093   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6094     SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
6095     SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
6096     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6097       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6098       SDValue S0 = S.getOperand(0);
6099       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6100         unsigned OpSizeInBits = VT.getScalarSizeInBits();
6101         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
6102           if (C->getAPIntValue() == (OpSizeInBits - 1))
6103             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
6104       }
6105     }
6106   }
6107
6108   // fold (xor x, x) -> 0
6109   if (N0 == N1)
6110     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
6111
6112   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6113   // Here is a concrete example of this equivalence:
6114   // i16   x ==  14
6115   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
6116   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6117   //
6118   // =>
6119   //
6120   // i16     ~1      == 0b1111111111111110
6121   // i16 rol(~1, 14) == 0b1011111111111111
6122   //
6123   // Some additional tips to help conceptualize this transform:
6124   // - Try to see the operation as placing a single zero in a value of all ones.
6125   // - There exists no value for x which would allow the result to contain zero.
6126   // - Values of x larger than the bitwidth are undefined and do not require a
6127   //   consistent result.
6128   // - Pushing the zero left requires shifting one bits in from the right.
6129   // A rotate left of ~1 is a nice way of achieving the desired result.
6130   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
6131       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
6132     SDLoc DL(N);
6133     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
6134                        N0.getOperand(1));
6135   }
6136
6137   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
6138   if (N0.getOpcode() == N1.getOpcode())
6139     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
6140       return Tmp;
6141
6142   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
6143   if (SDValue MM = unfoldMaskedMerge(N))
6144     return MM;
6145
6146   // Simplify the expression using non-local knowledge.
6147   if (SimplifyDemandedBits(SDValue(N, 0)))
6148     return SDValue(N, 0);
6149
6150   return SDValue();
6151 }
6152
6153 /// Handle transforms common to the three shifts, when the shift amount is a
6154 /// constant.
6155 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
6156   SDNode *LHS = N->getOperand(0).getNode();
6157   if (!LHS->hasOneUse()) return SDValue();
6158
6159   // We want to pull some binops through shifts, so that we have (and (shift))
6160   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
6161   // thing happens with address calculations, so it's important to canonicalize
6162   // it.
6163   bool HighBitSet = false;  // Can we transform this if the high bit is set?
6164
6165   switch (LHS->getOpcode()) {
6166   default: return SDValue();
6167   case ISD::OR:
6168   case ISD::XOR:
6169     HighBitSet = false; // We can only transform sra if the high bit is clear.
6170     break;
6171   case ISD::AND:
6172     HighBitSet = true;  // We can only transform sra if the high bit is set.
6173     break;
6174   case ISD::ADD:
6175     if (N->getOpcode() != ISD::SHL)
6176       return SDValue(); // only shl(add) not sr[al](add).
6177     HighBitSet = false; // We can only transform sra if the high bit is clear.
6178     break;
6179   }
6180
6181   // We require the RHS of the binop to be a constant and not opaque as well.
6182   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
6183   if (!BinOpCst) return SDValue();
6184
6185   // FIXME: disable this unless the input to the binop is a shift by a constant
6186   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
6187   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
6188   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
6189                  BinOpLHSVal->getOpcode() == ISD::SRA ||
6190                  BinOpLHSVal->getOpcode() == ISD::SRL;
6191   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
6192                         BinOpLHSVal->getOpcode() == ISD::SELECT;
6193
6194   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
6195       !isCopyOrSelect)
6196     return SDValue();
6197
6198   if (isCopyOrSelect && N->hasOneUse())
6199     return SDValue();
6200
6201   EVT VT = N->getValueType(0);
6202
6203   // If this is a signed shift right, and the high bit is modified by the
6204   // logical operation, do not perform the transformation. The highBitSet
6205   // boolean indicates the value of the high bit of the constant which would
6206   // cause it to be modified for this operation.
6207   if (N->getOpcode() == ISD::SRA) {
6208     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
6209     if (BinOpRHSSignSet != HighBitSet)
6210       return SDValue();
6211   }
6212
6213   if (!TLI.isDesirableToCommuteWithShift(N, Level))
6214     return SDValue();
6215
6216   // Fold the constants, shifting the binop RHS by the shift amount.
6217   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
6218                                N->getValueType(0),
6219                                LHS->getOperand(1), N->getOperand(1));
6220   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
6221
6222   // Create the new shift.
6223   SDValue NewShift = DAG.getNode(N->getOpcode(),
6224                                  SDLoc(LHS->getOperand(0)),
6225                                  VT, LHS->getOperand(0), N->getOperand(1));
6226
6227   // Create the new binop.
6228   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
6229 }
6230
6231 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
6232   assert(N->getOpcode() == ISD::TRUNCATE);
6233   assert(N->getOperand(0).getOpcode() == ISD::AND);
6234
6235   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6236   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
6237     SDValue N01 = N->getOperand(0).getOperand(1);
6238     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
6239       SDLoc DL(N);
6240       EVT TruncVT = N->getValueType(0);
6241       SDValue N00 = N->getOperand(0).getOperand(0);
6242       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
6243       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
6244       AddToWorklist(Trunc00.getNode());
6245       AddToWorklist(Trunc01.getNode());
6246       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
6247     }
6248   }
6249
6250   return SDValue();
6251 }
6252
6253 SDValue DAGCombiner::visitRotate(SDNode *N) {
6254   SDLoc dl(N);
6255   SDValue N0 = N->getOperand(0);
6256   SDValue N1 = N->getOperand(1);
6257   EVT VT = N->getValueType(0);
6258   unsigned Bitsize = VT.getScalarSizeInBits();
6259
6260   // fold (rot x, 0) -> x
6261   if (isNullConstantOrNullSplatConstant(N1))
6262     return N0;
6263
6264   // fold (rot x, c) -> (rot x, c % BitSize)
6265   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
6266     if (Cst->getAPIntValue().uge(Bitsize)) {
6267       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
6268       return DAG.getNode(N->getOpcode(), dl, VT, N0,
6269                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
6270     }
6271   }
6272
6273   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6274   if (N1.getOpcode() == ISD::TRUNCATE &&
6275       N1.getOperand(0).getOpcode() == ISD::AND) {
6276     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6277       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
6278   }
6279
6280   unsigned NextOp = N0.getOpcode();
6281   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6282   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
6283     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
6284     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
6285     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
6286       EVT ShiftVT = C1->getValueType(0);
6287       bool SameSide = (N->getOpcode() == NextOp);
6288       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
6289       if (SDValue CombinedShift =
6290               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
6291         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
6292         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
6293             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
6294             BitsizeC.getNode());
6295         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
6296                            CombinedShiftNorm);
6297       }
6298     }
6299   }
6300   return SDValue();
6301 }
6302
6303 SDValue DAGCombiner::visitSHL(SDNode *N) {
6304   SDValue N0 = N->getOperand(0);
6305   SDValue N1 = N->getOperand(1);
6306   EVT VT = N0.getValueType();
6307   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6308
6309   // fold vector ops
6310   if (VT.isVector()) {
6311     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6312       return FoldedVOp;
6313
6314     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
6315     // If setcc produces all-one true value then:
6316     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6317     if (N1CV && N1CV->isConstant()) {
6318       if (N0.getOpcode() == ISD::AND) {
6319         SDValue N00 = N0->getOperand(0);
6320         SDValue N01 = N0->getOperand(1);
6321         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
6322
6323         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
6324             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
6325                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
6326           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
6327                                                      N01CV, N1CV))
6328             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
6329         }
6330       }
6331     }
6332   }
6333
6334   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6335
6336   // fold (shl c1, c2) -> c1<<c2
6337   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6338   if (N0C && N1C && !N1C->isOpaque())
6339     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
6340   // fold (shl 0, x) -> 0
6341   if (isNullConstantOrNullSplatConstant(N0))
6342     return N0;
6343   // fold (shl x, c >= size(x)) -> undef
6344   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6345   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6346     return Val->getAPIntValue().uge(OpSizeInBits);
6347   };
6348   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6349     return DAG.getUNDEF(VT);
6350   // fold (shl x, 0) -> x
6351   if (N1C && N1C->isNullValue())
6352     return N0;
6353   // fold (shl undef, x) -> 0
6354   if (N0.isUndef())
6355     return DAG.getConstant(0, SDLoc(N), VT);
6356
6357   if (SDValue NewSel = foldBinOpIntoSelect(N))
6358     return NewSel;
6359
6360   // if (shl x, c) is known to be zero, return 0
6361   if (DAG.MaskedValueIsZero(SDValue(N, 0),
6362                             APInt::getAllOnesValue(OpSizeInBits)))
6363     return DAG.getConstant(0, SDLoc(N), VT);
6364   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6365   if (N1.getOpcode() == ISD::TRUNCATE &&
6366       N1.getOperand(0).getOpcode() == ISD::AND) {
6367     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6368       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6369   }
6370
6371   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6372     return SDValue(N, 0);
6373
6374   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6375   if (N0.getOpcode() == ISD::SHL) {
6376     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6377                                           ConstantSDNode *RHS) {
6378       APInt c1 = LHS->getAPIntValue();
6379       APInt c2 = RHS->getAPIntValue();
6380       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6381       return (c1 + c2).uge(OpSizeInBits);
6382     };
6383     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6384       return DAG.getConstant(0, SDLoc(N), VT);
6385
6386     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6387                                        ConstantSDNode *RHS) {
6388       APInt c1 = LHS->getAPIntValue();
6389       APInt c2 = RHS->getAPIntValue();
6390       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6391       return (c1 + c2).ult(OpSizeInBits);
6392     };
6393     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6394       SDLoc DL(N);
6395       EVT ShiftVT = N1.getValueType();
6396       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6397       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6398     }
6399   }
6400
6401   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6402   // For this to be valid, the second form must not preserve any of the bits
6403   // that are shifted out by the inner shift in the first form.  This means
6404   // the outer shift size must be >= the number of bits added by the ext.
6405   // As a corollary, we don't care what kind of ext it is.
6406   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6407               N0.getOpcode() == ISD::ANY_EXTEND ||
6408               N0.getOpcode() == ISD::SIGN_EXTEND) &&
6409       N0.getOperand(0).getOpcode() == ISD::SHL) {
6410     SDValue N0Op0 = N0.getOperand(0);
6411     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6412       APInt c1 = N0Op0C1->getAPIntValue();
6413       APInt c2 = N1C->getAPIntValue();
6414       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6415
6416       EVT InnerShiftVT = N0Op0.getValueType();
6417       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6418       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6419         SDLoc DL(N0);
6420         APInt Sum = c1 + c2;
6421         if (Sum.uge(OpSizeInBits))
6422           return DAG.getConstant(0, DL, VT);
6423
6424         return DAG.getNode(
6425             ISD::SHL, DL, VT,
6426             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6427             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6428       }
6429     }
6430   }
6431
6432   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6433   // Only fold this if the inner zext has no other uses to avoid increasing
6434   // the total number of instructions.
6435   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6436       N0.getOperand(0).getOpcode() == ISD::SRL) {
6437     SDValue N0Op0 = N0.getOperand(0);
6438     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6439       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6440         uint64_t c1 = N0Op0C1->getZExtValue();
6441         uint64_t c2 = N1C->getZExtValue();
6442         if (c1 == c2) {
6443           SDValue NewOp0 = N0.getOperand(0);
6444           EVT CountVT = NewOp0.getOperand(1).getValueType();
6445           SDLoc DL(N);
6446           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6447                                        NewOp0,
6448                                        DAG.getConstant(c2, DL, CountVT));
6449           AddToWorklist(NewSHL.getNode());
6450           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6451         }
6452       }
6453     }
6454   }
6455
6456   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
6457   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
6458   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6459       N0->getFlags().hasExact()) {
6460     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6461       uint64_t C1 = N0C1->getZExtValue();
6462       uint64_t C2 = N1C->getZExtValue();
6463       SDLoc DL(N);
6464       if (C1 <= C2)
6465         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6466                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6467       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6468                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6469     }
6470   }
6471
6472   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6473   //                               (and (srl x, (sub c1, c2), MASK)
6474   // Only fold this if the inner shift has no other uses -- if it does, folding
6475   // this will increase the total number of instructions.
6476   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
6477     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6478       uint64_t c1 = N0C1->getZExtValue();
6479       if (c1 < OpSizeInBits) {
6480         uint64_t c2 = N1C->getZExtValue();
6481         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6482         SDValue Shift;
6483         if (c2 > c1) {
6484           Mask <<= c2 - c1;
6485           SDLoc DL(N);
6486           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6487                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
6488         } else {
6489           Mask.lshrInPlace(c1 - c2);
6490           SDLoc DL(N);
6491           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
6492                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
6493         }
6494         SDLoc DL(N0);
6495         return DAG.getNode(ISD::AND, DL, VT, Shift,
6496                            DAG.getConstant(Mask, DL, VT));
6497       }
6498     }
6499   }
6500
6501   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6502   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
6503       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
6504     SDLoc DL(N);
6505     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
6506     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
6507     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
6508   }
6509
6510   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6511   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6512   // Variant of version done on multiply, except mul by a power of 2 is turned
6513   // into a shift.
6514   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
6515       N0.getNode()->hasOneUse() &&
6516       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6517       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
6518       TLI.isDesirableToCommuteWithShift(N, Level)) {
6519     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
6520     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6521     AddToWorklist(Shl0.getNode());
6522     AddToWorklist(Shl1.getNode());
6523     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
6524   }
6525
6526   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6527   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
6528       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6529       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6530     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6531     if (isConstantOrConstantVector(Shl))
6532       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
6533   }
6534
6535   if (N1C && !N1C->isOpaque())
6536     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
6537       return NewSHL;
6538
6539   return SDValue();
6540 }
6541
6542 SDValue DAGCombiner::visitSRA(SDNode *N) {
6543   SDValue N0 = N->getOperand(0);
6544   SDValue N1 = N->getOperand(1);
6545   EVT VT = N0.getValueType();
6546   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6547
6548   // Arithmetic shifting an all-sign-bit value is a no-op.
6549   // fold (sra 0, x) -> 0
6550   // fold (sra -1, x) -> -1
6551   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
6552     return N0;
6553
6554   // fold vector ops
6555   if (VT.isVector())
6556     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6557       return FoldedVOp;
6558
6559   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6560
6561   // fold (sra c1, c2) -> (sra c1, c2)
6562   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6563   if (N0C && N1C && !N1C->isOpaque())
6564     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
6565   // fold (sra x, c >= size(x)) -> undef
6566   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6567   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6568     return Val->getAPIntValue().uge(OpSizeInBits);
6569   };
6570   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6571     return DAG.getUNDEF(VT);
6572   // fold (sra x, 0) -> x
6573   if (N1C && N1C->isNullValue())
6574     return N0;
6575
6576   if (SDValue NewSel = foldBinOpIntoSelect(N))
6577     return NewSel;
6578
6579   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
6580   // sext_inreg.
6581   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
6582     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
6583     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
6584     if (VT.isVector())
6585       ExtVT = EVT::getVectorVT(*DAG.getContext(),
6586                                ExtVT, VT.getVectorNumElements());
6587     if ((!LegalOperations ||
6588          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
6589       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6590                          N0.getOperand(0), DAG.getValueType(ExtVT));
6591   }
6592
6593   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
6594   // clamp (add c1, c2) to max shift.
6595   if (N0.getOpcode() == ISD::SRA) {
6596     SDLoc DL(N);
6597     EVT ShiftVT = N1.getValueType();
6598     EVT ShiftSVT = ShiftVT.getScalarType();
6599     SmallVector<SDValue, 16> ShiftValues;
6600
6601     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6602       APInt c1 = LHS->getAPIntValue();
6603       APInt c2 = RHS->getAPIntValue();
6604       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6605       APInt Sum = c1 + c2;
6606       unsigned ShiftSum =
6607           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
6608       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
6609       return true;
6610     };
6611     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
6612       SDValue ShiftValue;
6613       if (VT.isVector())
6614         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
6615       else
6616         ShiftValue = ShiftValues[0];
6617       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
6618     }
6619   }
6620
6621   // fold (sra (shl X, m), (sub result_size, n))
6622   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
6623   // result_size - n != m.
6624   // If truncate is free for the target sext(shl) is likely to result in better
6625   // code.
6626   if (N0.getOpcode() == ISD::SHL && N1C) {
6627     // Get the two constanst of the shifts, CN0 = m, CN = n.
6628     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
6629     if (N01C) {
6630       LLVMContext &Ctx = *DAG.getContext();
6631       // Determine what the truncate's result bitsize and type would be.
6632       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
6633
6634       if (VT.isVector())
6635         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
6636
6637       // Determine the residual right-shift amount.
6638       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
6639
6640       // If the shift is not a no-op (in which case this should be just a sign
6641       // extend already), the truncated to type is legal, sign_extend is legal
6642       // on that type, and the truncate to that type is both legal and free,
6643       // perform the transform.
6644       if ((ShiftAmt > 0) &&
6645           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
6646           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
6647           TLI.isTruncateFree(VT, TruncVT)) {
6648         SDLoc DL(N);
6649         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
6650             getShiftAmountTy(N0.getOperand(0).getValueType()));
6651         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
6652                                     N0.getOperand(0), Amt);
6653         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
6654                                     Shift);
6655         return DAG.getNode(ISD::SIGN_EXTEND, DL,
6656                            N->getValueType(0), Trunc);
6657       }
6658     }
6659   }
6660
6661   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
6662   if (N1.getOpcode() == ISD::TRUNCATE &&
6663       N1.getOperand(0).getOpcode() == ISD::AND) {
6664     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6665       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
6666   }
6667
6668   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
6669   //      if c1 is equal to the number of bits the trunc removes
6670   if (N0.getOpcode() == ISD::TRUNCATE &&
6671       (N0.getOperand(0).getOpcode() == ISD::SRL ||
6672        N0.getOperand(0).getOpcode() == ISD::SRA) &&
6673       N0.getOperand(0).hasOneUse() &&
6674       N0.getOperand(0).getOperand(1).hasOneUse() &&
6675       N1C) {
6676     SDValue N0Op0 = N0.getOperand(0);
6677     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
6678       unsigned LargeShiftVal = LargeShift->getZExtValue();
6679       EVT LargeVT = N0Op0.getValueType();
6680
6681       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
6682         SDLoc DL(N);
6683         SDValue Amt =
6684           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
6685                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
6686         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
6687                                   N0Op0.getOperand(0), Amt);
6688         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
6689       }
6690     }
6691   }
6692
6693   // Simplify, based on bits shifted out of the LHS.
6694   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6695     return SDValue(N, 0);
6696
6697   // If the sign bit is known to be zero, switch this to a SRL.
6698   if (DAG.SignBitIsZero(N0))
6699     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
6700
6701   if (N1C && !N1C->isOpaque())
6702     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
6703       return NewSRA;
6704
6705   return SDValue();
6706 }
6707
6708 SDValue DAGCombiner::visitSRL(SDNode *N) {
6709   SDValue N0 = N->getOperand(0);
6710   SDValue N1 = N->getOperand(1);
6711   EVT VT = N0.getValueType();
6712   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6713
6714   // fold vector ops
6715   if (VT.isVector())
6716     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6717       return FoldedVOp;
6718
6719   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6720
6721   // fold (srl c1, c2) -> c1 >>u c2
6722   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6723   if (N0C && N1C && !N1C->isOpaque())
6724     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6725   // fold (srl 0, x) -> 0
6726   if (isNullConstantOrNullSplatConstant(N0))
6727     return N0;
6728   // fold (srl x, c >= size(x)) -> undef
6729   // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6730   auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6731     return Val->getAPIntValue().uge(OpSizeInBits);
6732   };
6733   if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6734     return DAG.getUNDEF(VT);
6735   // fold (srl x, 0) -> x
6736   if (N1C && N1C->isNullValue())
6737     return N0;
6738
6739   if (SDValue NewSel = foldBinOpIntoSelect(N))
6740     return NewSel;
6741
6742   // if (srl x, c) is known to be zero, return 0
6743   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6744                                    APInt::getAllOnesValue(OpSizeInBits)))
6745     return DAG.getConstant(0, SDLoc(N), VT);
6746
6747   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6748   if (N0.getOpcode() == ISD::SRL) {
6749     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6750                                           ConstantSDNode *RHS) {
6751       APInt c1 = LHS->getAPIntValue();
6752       APInt c2 = RHS->getAPIntValue();
6753       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6754       return (c1 + c2).uge(OpSizeInBits);
6755     };
6756     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6757       return DAG.getConstant(0, SDLoc(N), VT);
6758
6759     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6760                                        ConstantSDNode *RHS) {
6761       APInt c1 = LHS->getAPIntValue();
6762       APInt c2 = RHS->getAPIntValue();
6763       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6764       return (c1 + c2).ult(OpSizeInBits);
6765     };
6766     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6767       SDLoc DL(N);
6768       EVT ShiftVT = N1.getValueType();
6769       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6770       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
6771     }
6772   }
6773
6774   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
6775   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
6776       N0.getOperand(0).getOpcode() == ISD::SRL) {
6777     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
6778       uint64_t c1 = N001C->getZExtValue();
6779       uint64_t c2 = N1C->getZExtValue();
6780       EVT InnerShiftVT = N0.getOperand(0).getValueType();
6781       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
6782       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6783       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
6784       if (c1 + OpSizeInBits == InnerShiftSize) {
6785         SDLoc DL(N0);
6786         if (c1 + c2 >= InnerShiftSize)
6787           return DAG.getConstant(0, DL, VT);
6788         return DAG.getNode(ISD::TRUNCATE, DL, VT,
6789                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
6790                                        N0.getOperand(0).getOperand(0),
6791                                        DAG.getConstant(c1 + c2, DL,
6792                                                        ShiftCountVT)));
6793       }
6794     }
6795   }
6796
6797   // fold (srl (shl x, c), c) -> (and x, cst2)
6798   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
6799       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
6800     SDLoc DL(N);
6801     SDValue Mask =
6802         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
6803     AddToWorklist(Mask.getNode());
6804     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
6805   }
6806
6807   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
6808   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6809     // Shifting in all undef bits?
6810     EVT SmallVT = N0.getOperand(0).getValueType();
6811     unsigned BitSize = SmallVT.getScalarSizeInBits();
6812     if (N1C->getZExtValue() >= BitSize)
6813       return DAG.getUNDEF(VT);
6814
6815     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
6816       uint64_t ShiftAmt = N1C->getZExtValue();
6817       SDLoc DL0(N0);
6818       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
6819                                        N0.getOperand(0),
6820                           DAG.getConstant(ShiftAmt, DL0,
6821                                           getShiftAmountTy(SmallVT)));
6822       AddToWorklist(SmallShift.getNode());
6823       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
6824       SDLoc DL(N);
6825       return DAG.getNode(ISD::AND, DL, VT,
6826                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
6827                          DAG.getConstant(Mask, DL, VT));
6828     }
6829   }
6830
6831   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
6832   // bit, which is unmodified by sra.
6833   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
6834     if (N0.getOpcode() == ISD::SRA)
6835       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
6836   }
6837
6838   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
6839   if (N1C && N0.getOpcode() == ISD::CTLZ &&
6840       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
6841     KnownBits Known;
6842     DAG.computeKnownBits(N0.getOperand(0), Known);
6843
6844     // If any of the input bits are KnownOne, then the input couldn't be all
6845     // zeros, thus the result of the srl will always be zero.
6846     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
6847
6848     // If all of the bits input the to ctlz node are known to be zero, then
6849     // the result of the ctlz is "32" and the result of the shift is one.
6850     APInt UnknownBits = ~Known.Zero;
6851     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
6852
6853     // Otherwise, check to see if there is exactly one bit input to the ctlz.
6854     if (UnknownBits.isPowerOf2()) {
6855       // Okay, we know that only that the single bit specified by UnknownBits
6856       // could be set on input to the CTLZ node. If this bit is set, the SRL
6857       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
6858       // to an SRL/XOR pair, which is likely to simplify more.
6859       unsigned ShAmt = UnknownBits.countTrailingZeros();
6860       SDValue Op = N0.getOperand(0);
6861
6862       if (ShAmt) {
6863         SDLoc DL(N0);
6864         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
6865                   DAG.getConstant(ShAmt, DL,
6866                                   getShiftAmountTy(Op.getValueType())));
6867         AddToWorklist(Op.getNode());
6868       }
6869
6870       SDLoc DL(N);
6871       return DAG.getNode(ISD::XOR, DL, VT,
6872                          Op, DAG.getConstant(1, DL, VT));
6873     }
6874   }
6875
6876   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
6877   if (N1.getOpcode() == ISD::TRUNCATE &&
6878       N1.getOperand(0).getOpcode() == ISD::AND) {
6879     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6880       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
6881   }
6882
6883   // fold operands of srl based on knowledge that the low bits are not
6884   // demanded.
6885   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6886     return SDValue(N, 0);
6887
6888   if (N1C && !N1C->isOpaque())
6889     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
6890       return NewSRL;
6891
6892   // Attempt to convert a srl of a load into a narrower zero-extending load.
6893   if (SDValue NarrowLoad = ReduceLoadWidth(N))
6894     return NarrowLoad;
6895
6896   // Here is a common situation. We want to optimize:
6897   //
6898   //   %a = ...
6899   //   %b = and i32 %a, 2
6900   //   %c = srl i32 %b, 1
6901   //   brcond i32 %c ...
6902   //
6903   // into
6904   //
6905   //   %a = ...
6906   //   %b = and %a, 2
6907   //   %c = setcc eq %b, 0
6908   //   brcond %c ...
6909   //
6910   // However when after the source operand of SRL is optimized into AND, the SRL
6911   // itself may not be optimized further. Look for it and add the BRCOND into
6912   // the worklist.
6913   if (N->hasOneUse()) {
6914     SDNode *Use = *N->use_begin();
6915     if (Use->getOpcode() == ISD::BRCOND)
6916       AddToWorklist(Use);
6917     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
6918       // Also look pass the truncate.
6919       Use = *Use->use_begin();
6920       if (Use->getOpcode() == ISD::BRCOND)
6921         AddToWorklist(Use);
6922     }
6923   }
6924
6925   return SDValue();
6926 }
6927
6928 SDValue DAGCombiner::visitABS(SDNode *N) {
6929   SDValue N0 = N->getOperand(0);
6930   EVT VT = N->getValueType(0);
6931
6932   // fold (abs c1) -> c2
6933   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6934     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
6935   // fold (abs (abs x)) -> (abs x)
6936   if (N0.getOpcode() == ISD::ABS)
6937     return N0;
6938   // fold (abs x) -> x iff not-negative
6939   if (DAG.SignBitIsZero(N0))
6940     return N0;
6941   return SDValue();
6942 }
6943
6944 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6945   SDValue N0 = N->getOperand(0);
6946   EVT VT = N->getValueType(0);
6947
6948   // fold (bswap c1) -> c2
6949   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6950     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6951   // fold (bswap (bswap x)) -> x
6952   if (N0.getOpcode() == ISD::BSWAP)
6953     return N0->getOperand(0);
6954   return SDValue();
6955 }
6956
6957 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6958   SDValue N0 = N->getOperand(0);
6959   EVT VT = N->getValueType(0);
6960
6961   // fold (bitreverse c1) -> c2
6962   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6963     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6964   // fold (bitreverse (bitreverse x)) -> x
6965   if (N0.getOpcode() == ISD::BITREVERSE)
6966     return N0.getOperand(0);
6967   return SDValue();
6968 }
6969
6970 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6971   SDValue N0 = N->getOperand(0);
6972   EVT VT = N->getValueType(0);
6973
6974   // fold (ctlz c1) -> c2
6975   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6976     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6977
6978   // If the value is known never to be zero, switch to the undef version.
6979   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
6980     if (DAG.isKnownNeverZero(N0))
6981       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6982   }
6983
6984   return SDValue();
6985 }
6986
6987 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6988   SDValue N0 = N->getOperand(0);
6989   EVT VT = N->getValueType(0);
6990
6991   // fold (ctlz_zero_undef c1) -> c2
6992   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6993     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6994   return SDValue();
6995 }
6996
6997 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6998   SDValue N0 = N->getOperand(0);
6999   EVT VT = N->getValueType(0);
7000
7001   // fold (cttz c1) -> c2
7002   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7003     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
7004
7005   // If the value is known never to be zero, switch to the undef version.
7006   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
7007     if (DAG.isKnownNeverZero(N0))
7008       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7009   }
7010
7011   return SDValue();
7012 }
7013
7014 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
7015   SDValue N0 = N->getOperand(0);
7016   EVT VT = N->getValueType(0);
7017
7018   // fold (cttz_zero_undef c1) -> c2
7019   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7020     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7021   return SDValue();
7022 }
7023
7024 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
7025   SDValue N0 = N->getOperand(0);
7026   EVT VT = N->getValueType(0);
7027
7028   // fold (ctpop c1) -> c2
7029   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7030     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
7031   return SDValue();
7032 }
7033
7034 /// Generate Min/Max node
7035 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
7036                                    SDValue RHS, SDValue True, SDValue False,
7037                                    ISD::CondCode CC, const TargetLowering &TLI,
7038                                    SelectionDAG &DAG) {
7039   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
7040     return SDValue();
7041
7042   switch (CC) {
7043   case ISD::SETOLT:
7044   case ISD::SETOLE:
7045   case ISD::SETLT:
7046   case ISD::SETLE:
7047   case ISD::SETULT:
7048   case ISD::SETULE: {
7049     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
7050     if (TLI.isOperationLegal(Opcode, VT))
7051       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7052     return SDValue();
7053   }
7054   case ISD::SETOGT:
7055   case ISD::SETOGE:
7056   case ISD::SETGT:
7057   case ISD::SETGE:
7058   case ISD::SETUGT:
7059   case ISD::SETUGE: {
7060     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
7061     if (TLI.isOperationLegal(Opcode, VT))
7062       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7063     return SDValue();
7064   }
7065   default:
7066     return SDValue();
7067   }
7068 }
7069
7070 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
7071   SDValue Cond = N->getOperand(0);
7072   SDValue N1 = N->getOperand(1);
7073   SDValue N2 = N->getOperand(2);
7074   EVT VT = N->getValueType(0);
7075   EVT CondVT = Cond.getValueType();
7076   SDLoc DL(N);
7077
7078   if (!VT.isInteger())
7079     return SDValue();
7080
7081   auto *C1 = dyn_cast<ConstantSDNode>(N1);
7082   auto *C2 = dyn_cast<ConstantSDNode>(N2);
7083   if (!C1 || !C2)
7084     return SDValue();
7085
7086   // Only do this before legalization to avoid conflicting with target-specific
7087   // transforms in the other direction (create a select from a zext/sext). There
7088   // is also a target-independent combine here in DAGCombiner in the other
7089   // direction for (select Cond, -1, 0) when the condition is not i1.
7090   if (CondVT == MVT::i1 && !LegalOperations) {
7091     if (C1->isNullValue() && C2->isOne()) {
7092       // select Cond, 0, 1 --> zext (!Cond)
7093       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7094       if (VT != MVT::i1)
7095         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
7096       return NotCond;
7097     }
7098     if (C1->isNullValue() && C2->isAllOnesValue()) {
7099       // select Cond, 0, -1 --> sext (!Cond)
7100       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7101       if (VT != MVT::i1)
7102         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
7103       return NotCond;
7104     }
7105     if (C1->isOne() && C2->isNullValue()) {
7106       // select Cond, 1, 0 --> zext (Cond)
7107       if (VT != MVT::i1)
7108         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7109       return Cond;
7110     }
7111     if (C1->isAllOnesValue() && C2->isNullValue()) {
7112       // select Cond, -1, 0 --> sext (Cond)
7113       if (VT != MVT::i1)
7114         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7115       return Cond;
7116     }
7117
7118     // For any constants that differ by 1, we can transform the select into an
7119     // extend and add. Use a target hook because some targets may prefer to
7120     // transform in the other direction.
7121     if (TLI.convertSelectOfConstantsToMath(VT)) {
7122       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
7123         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7124         if (VT != MVT::i1)
7125           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7126         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7127       }
7128       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
7129         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7130         if (VT != MVT::i1)
7131           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7132         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7133       }
7134     }
7135
7136     return SDValue();
7137   }
7138
7139   // fold (select Cond, 0, 1) -> (xor Cond, 1)
7140   // We can't do this reliably if integer based booleans have different contents
7141   // to floating point based booleans. This is because we can't tell whether we
7142   // have an integer-based boolean or a floating-point-based boolean unless we
7143   // can find the SETCC that produced it and inspect its operands. This is
7144   // fairly easy if C is the SETCC node, but it can potentially be
7145   // undiscoverable (or not reasonably discoverable). For example, it could be
7146   // in another basic block or it could require searching a complicated
7147   // expression.
7148   if (CondVT.isInteger() &&
7149       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7150           TargetLowering::ZeroOrOneBooleanContent &&
7151       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7152           TargetLowering::ZeroOrOneBooleanContent &&
7153       C1->isNullValue() && C2->isOne()) {
7154     SDValue NotCond =
7155         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
7156     if (VT.bitsEq(CondVT))
7157       return NotCond;
7158     return DAG.getZExtOrTrunc(NotCond, DL, VT);
7159   }
7160
7161   return SDValue();
7162 }
7163
7164 SDValue DAGCombiner::visitSELECT(SDNode *N) {
7165   SDValue N0 = N->getOperand(0);
7166   SDValue N1 = N->getOperand(1);
7167   SDValue N2 = N->getOperand(2);
7168   EVT VT = N->getValueType(0);
7169   EVT VT0 = N0.getValueType();
7170   SDLoc DL(N);
7171
7172   // fold (select C, X, X) -> X
7173   if (N1 == N2)
7174     return N1;
7175
7176   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
7177     // fold (select true, X, Y) -> X
7178     // fold (select false, X, Y) -> Y
7179     return !N0C->isNullValue() ? N1 : N2;
7180   }
7181
7182   // fold (select X, X, Y) -> (or X, Y)
7183   // fold (select X, 1, Y) -> (or C, Y)
7184   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
7185     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
7186
7187   if (SDValue V = foldSelectOfConstants(N))
7188     return V;
7189
7190   // fold (select C, 0, X) -> (and (not C), X)
7191   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
7192     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7193     AddToWorklist(NOTNode.getNode());
7194     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
7195   }
7196   // fold (select C, X, 1) -> (or (not C), X)
7197   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
7198     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7199     AddToWorklist(NOTNode.getNode());
7200     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
7201   }
7202   // fold (select X, Y, X) -> (and X, Y)
7203   // fold (select X, Y, 0) -> (and X, Y)
7204   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
7205     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
7206
7207   // If we can fold this based on the true/false value, do so.
7208   if (SimplifySelectOps(N, N1, N2))
7209     return SDValue(N, 0); // Don't revisit N.
7210
7211   if (VT0 == MVT::i1) {
7212     // The code in this block deals with the following 2 equivalences:
7213     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7214     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7215     // The target can specify its preferred form with the
7216     // shouldNormalizeToSelectSequence() callback. However we always transform
7217     // to the right anyway if we find the inner select exists in the DAG anyway
7218     // and we always transform to the left side if we know that we can further
7219     // optimize the combination of the conditions.
7220     bool normalizeToSequence =
7221         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
7222     // select (and Cond0, Cond1), X, Y
7223     //   -> select Cond0, (select Cond1, X, Y), Y
7224     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
7225       SDValue Cond0 = N0->getOperand(0);
7226       SDValue Cond1 = N0->getOperand(1);
7227       SDValue InnerSelect =
7228           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7229       if (normalizeToSequence || !InnerSelect.use_empty())
7230         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
7231                            InnerSelect, N2);
7232     }
7233     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7234     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
7235       SDValue Cond0 = N0->getOperand(0);
7236       SDValue Cond1 = N0->getOperand(1);
7237       SDValue InnerSelect =
7238           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7239       if (normalizeToSequence || !InnerSelect.use_empty())
7240         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
7241                            InnerSelect);
7242     }
7243
7244     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7245     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
7246       SDValue N1_0 = N1->getOperand(0);
7247       SDValue N1_1 = N1->getOperand(1);
7248       SDValue N1_2 = N1->getOperand(2);
7249       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
7250         // Create the actual and node if we can generate good code for it.
7251         if (!normalizeToSequence) {
7252           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
7253           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
7254         }
7255         // Otherwise see if we can optimize the "and" to a better pattern.
7256         if (SDValue Combined = visitANDLike(N0, N1_0, N))
7257           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
7258                              N2);
7259       }
7260     }
7261     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7262     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
7263       SDValue N2_0 = N2->getOperand(0);
7264       SDValue N2_1 = N2->getOperand(1);
7265       SDValue N2_2 = N2->getOperand(2);
7266       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
7267         // Create the actual or node if we can generate good code for it.
7268         if (!normalizeToSequence) {
7269           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
7270           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
7271         }
7272         // Otherwise see if we can optimize to a better pattern.
7273         if (SDValue Combined = visitORLike(N0, N2_0, N))
7274           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
7275                              N2_2);
7276       }
7277     }
7278   }
7279
7280   if (VT0 == MVT::i1) {
7281     // select (not Cond), N1, N2 -> select Cond, N2, N1
7282     if (isBitwiseNot(N0))
7283       return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
7284   }
7285
7286   // fold selects based on a setcc into other things, such as min/max/abs
7287   if (N0.getOpcode() == ISD::SETCC) {
7288     // select x, y (fcmp lt x, y) -> fminnum x, y
7289     // select x, y (fcmp gt x, y) -> fmaxnum x, y
7290     //
7291     // This is OK if we don't care about what happens if either operand is a
7292     // NaN.
7293     //
7294
7295     // FIXME: This should be checking for no signed zeros on individual
7296     // operands, as well as no nans.
7297     const TargetOptions &Options = DAG.getTarget().Options;
7298     if (Options.NoSignedZerosFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
7299         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
7300       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7301
7302       if (SDValue FMinMax = combineMinNumMaxNum(
7303               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
7304         return FMinMax;
7305     }
7306
7307     if ((!LegalOperations &&
7308          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
7309         TLI.isOperationLegal(ISD::SELECT_CC, VT))
7310       return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
7311                          N0.getOperand(1), N1, N2, N0.getOperand(2));
7312     return SimplifySelect(DL, N0, N1, N2);
7313   }
7314
7315   return SDValue();
7316 }
7317
7318 static
7319 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
7320   SDLoc DL(N);
7321   EVT LoVT, HiVT;
7322   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
7323
7324   // Split the inputs.
7325   SDValue Lo, Hi, LL, LH, RL, RH;
7326   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
7327   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
7328
7329   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
7330   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
7331
7332   return std::make_pair(Lo, Hi);
7333 }
7334
7335 // This function assumes all the vselect's arguments are CONCAT_VECTOR
7336 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7337 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
7338   SDLoc DL(N);
7339   SDValue Cond = N->getOperand(0);
7340   SDValue LHS = N->getOperand(1);
7341   SDValue RHS = N->getOperand(2);
7342   EVT VT = N->getValueType(0);
7343   int NumElems = VT.getVectorNumElements();
7344   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
7345          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
7346          Cond.getOpcode() == ISD::BUILD_VECTOR);
7347
7348   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7349   // binary ones here.
7350   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
7351     return SDValue();
7352
7353   // We're sure we have an even number of elements due to the
7354   // concat_vectors we have as arguments to vselect.
7355   // Skip BV elements until we find one that's not an UNDEF
7356   // After we find an UNDEF element, keep looping until we get to half the
7357   // length of the BV and see if all the non-undef nodes are the same.
7358   ConstantSDNode *BottomHalf = nullptr;
7359   for (int i = 0; i < NumElems / 2; ++i) {
7360     if (Cond->getOperand(i)->isUndef())
7361       continue;
7362
7363     if (BottomHalf == nullptr)
7364       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7365     else if (Cond->getOperand(i).getNode() != BottomHalf)
7366       return SDValue();
7367   }
7368
7369   // Do the same for the second half of the BuildVector
7370   ConstantSDNode *TopHalf = nullptr;
7371   for (int i = NumElems / 2; i < NumElems; ++i) {
7372     if (Cond->getOperand(i)->isUndef())
7373       continue;
7374
7375     if (TopHalf == nullptr)
7376       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7377     else if (Cond->getOperand(i).getNode() != TopHalf)
7378       return SDValue();
7379   }
7380
7381   assert(TopHalf && BottomHalf &&
7382          "One half of the selector was all UNDEFs and the other was all the "
7383          "same value. This should have been addressed before this function.");
7384   return DAG.getNode(
7385       ISD::CONCAT_VECTORS, DL, VT,
7386       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
7387       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
7388 }
7389
7390 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
7391   if (Level >= AfterLegalizeTypes)
7392     return SDValue();
7393
7394   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
7395   SDValue Mask = MSC->getMask();
7396   SDValue Data  = MSC->getValue();
7397   SDLoc DL(N);
7398
7399   // If the MSCATTER data type requires splitting and the mask is provided by a
7400   // SETCC, then split both nodes and its operands before legalization. This
7401   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7402   // and enables future optimizations (e.g. min/max pattern matching on X86).
7403   if (Mask.getOpcode() != ISD::SETCC)
7404     return SDValue();
7405
7406   // Check if any splitting is required.
7407   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
7408       TargetLowering::TypeSplitVector)
7409     return SDValue();
7410   SDValue MaskLo, MaskHi;
7411   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7412
7413   EVT LoVT, HiVT;
7414   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
7415
7416   SDValue Chain = MSC->getChain();
7417
7418   EVT MemoryVT = MSC->getMemoryVT();
7419   unsigned Alignment = MSC->getOriginalAlignment();
7420
7421   EVT LoMemVT, HiMemVT;
7422   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7423
7424   SDValue DataLo, DataHi;
7425   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7426
7427   SDValue Scale = MSC->getScale();
7428   SDValue BasePtr = MSC->getBasePtr();
7429   SDValue IndexLo, IndexHi;
7430   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
7431
7432   MachineMemOperand *MMO = DAG.getMachineFunction().
7433     getMachineMemOperand(MSC->getPointerInfo(),
7434                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7435                           Alignment, MSC->getAAInfo(), MSC->getRanges());
7436
7437   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
7438   SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
7439                                     DataLo.getValueType(), DL, OpsLo, MMO);
7440
7441   // The order of the Scatter operation after split is well defined. The "Hi"
7442   // part comes after the "Lo". So these two operations should be chained one
7443   // after another.
7444   SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
7445   return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
7446                               DL, OpsHi, MMO);
7447 }
7448
7449 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
7450   if (Level >= AfterLegalizeTypes)
7451     return SDValue();
7452
7453   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
7454   SDValue Mask = MST->getMask();
7455   SDValue Data  = MST->getValue();
7456   EVT VT = Data.getValueType();
7457   SDLoc DL(N);
7458
7459   // If the MSTORE data type requires splitting and the mask is provided by a
7460   // SETCC, then split both nodes and its operands before legalization. This
7461   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7462   // and enables future optimizations (e.g. min/max pattern matching on X86).
7463   if (Mask.getOpcode() == ISD::SETCC) {
7464     // Check if any splitting is required.
7465     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7466         TargetLowering::TypeSplitVector)
7467       return SDValue();
7468
7469     SDValue MaskLo, MaskHi, Lo, Hi;
7470     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7471
7472     SDValue Chain = MST->getChain();
7473     SDValue Ptr   = MST->getBasePtr();
7474
7475     EVT MemoryVT = MST->getMemoryVT();
7476     unsigned Alignment = MST->getOriginalAlignment();
7477
7478     // if Alignment is equal to the vector size,
7479     // take the half of it for the second part
7480     unsigned SecondHalfAlignment =
7481       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
7482
7483     EVT LoMemVT, HiMemVT;
7484     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7485
7486     SDValue DataLo, DataHi;
7487     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7488
7489     MachineMemOperand *MMO = DAG.getMachineFunction().
7490       getMachineMemOperand(MST->getPointerInfo(),
7491                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7492                            Alignment, MST->getAAInfo(), MST->getRanges());
7493
7494     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
7495                             MST->isTruncatingStore(),
7496                             MST->isCompressingStore());
7497
7498     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7499                                      MST->isCompressingStore());
7500     unsigned HiOffset = LoMemVT.getStoreSize();
7501
7502     MMO = DAG.getMachineFunction().getMachineMemOperand(
7503         MST->getPointerInfo().getWithOffset(HiOffset),
7504         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
7505         MST->getAAInfo(), MST->getRanges());
7506
7507     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
7508                             MST->isTruncatingStore(),
7509                             MST->isCompressingStore());
7510
7511     AddToWorklist(Lo.getNode());
7512     AddToWorklist(Hi.getNode());
7513
7514     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7515   }
7516   return SDValue();
7517 }
7518
7519 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
7520   if (Level >= AfterLegalizeTypes)
7521     return SDValue();
7522
7523   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
7524   SDValue Mask = MGT->getMask();
7525   SDLoc DL(N);
7526
7527   // If the MGATHER result requires splitting and the mask is provided by a
7528   // SETCC, then split both nodes and its operands before legalization. This
7529   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7530   // and enables future optimizations (e.g. min/max pattern matching on X86).
7531
7532   if (Mask.getOpcode() != ISD::SETCC)
7533     return SDValue();
7534
7535   EVT VT = N->getValueType(0);
7536
7537   // Check if any splitting is required.
7538   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7539       TargetLowering::TypeSplitVector)
7540     return SDValue();
7541
7542   SDValue MaskLo, MaskHi, Lo, Hi;
7543   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7544
7545   SDValue PassThru = MGT->getPassThru();
7546   SDValue PassThruLo, PassThruHi;
7547   std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7548
7549   EVT LoVT, HiVT;
7550   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
7551
7552   SDValue Chain = MGT->getChain();
7553   EVT MemoryVT = MGT->getMemoryVT();
7554   unsigned Alignment = MGT->getOriginalAlignment();
7555
7556   EVT LoMemVT, HiMemVT;
7557   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7558
7559   SDValue Scale = MGT->getScale();
7560   SDValue BasePtr = MGT->getBasePtr();
7561   SDValue Index = MGT->getIndex();
7562   SDValue IndexLo, IndexHi;
7563   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
7564
7565   MachineMemOperand *MMO = DAG.getMachineFunction().
7566     getMachineMemOperand(MGT->getPointerInfo(),
7567                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7568                           Alignment, MGT->getAAInfo(), MGT->getRanges());
7569
7570   SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
7571   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
7572                            MMO);
7573
7574   SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
7575   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
7576                            MMO);
7577
7578   AddToWorklist(Lo.getNode());
7579   AddToWorklist(Hi.getNode());
7580
7581   // Build a factor node to remember that this load is independent of the
7582   // other one.
7583   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7584                       Hi.getValue(1));
7585
7586   // Legalized the chain result - switch anything that used the old chain to
7587   // use the new one.
7588   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
7589
7590   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7591
7592   SDValue RetOps[] = { GatherRes, Chain };
7593   return DAG.getMergeValues(RetOps, DL);
7594 }
7595
7596 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
7597   if (Level >= AfterLegalizeTypes)
7598     return SDValue();
7599
7600   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
7601   SDValue Mask = MLD->getMask();
7602   SDLoc DL(N);
7603
7604   // If the MLOAD result requires splitting and the mask is provided by a
7605   // SETCC, then split both nodes and its operands before legalization. This
7606   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7607   // and enables future optimizations (e.g. min/max pattern matching on X86).
7608   if (Mask.getOpcode() == ISD::SETCC) {
7609     EVT VT = N->getValueType(0);
7610
7611     // Check if any splitting is required.
7612     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7613         TargetLowering::TypeSplitVector)
7614       return SDValue();
7615
7616     SDValue MaskLo, MaskHi, Lo, Hi;
7617     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7618
7619     SDValue PassThru = MLD->getPassThru();
7620     SDValue PassThruLo, PassThruHi;
7621     std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7622
7623     EVT LoVT, HiVT;
7624     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
7625
7626     SDValue Chain = MLD->getChain();
7627     SDValue Ptr   = MLD->getBasePtr();
7628     EVT MemoryVT = MLD->getMemoryVT();
7629     unsigned Alignment = MLD->getOriginalAlignment();
7630
7631     // if Alignment is equal to the vector size,
7632     // take the half of it for the second part
7633     unsigned SecondHalfAlignment =
7634       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
7635          Alignment/2 : Alignment;
7636
7637     EVT LoMemVT, HiMemVT;
7638     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7639
7640     MachineMemOperand *MMO = DAG.getMachineFunction().
7641     getMachineMemOperand(MLD->getPointerInfo(),
7642                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7643                          Alignment, MLD->getAAInfo(), MLD->getRanges());
7644
7645     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
7646                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7647
7648     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7649                                      MLD->isExpandingLoad());
7650     unsigned HiOffset = LoMemVT.getStoreSize();
7651
7652     MMO = DAG.getMachineFunction().getMachineMemOperand(
7653         MLD->getPointerInfo().getWithOffset(HiOffset),
7654         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
7655         MLD->getAAInfo(), MLD->getRanges());
7656
7657     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
7658                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7659
7660     AddToWorklist(Lo.getNode());
7661     AddToWorklist(Hi.getNode());
7662
7663     // Build a factor node to remember that this load is independent of the
7664     // other one.
7665     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7666                         Hi.getValue(1));
7667
7668     // Legalized the chain result - switch anything that used the old chain to
7669     // use the new one.
7670     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
7671
7672     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7673
7674     SDValue RetOps[] = { LoadRes, Chain };
7675     return DAG.getMergeValues(RetOps, DL);
7676   }
7677   return SDValue();
7678 }
7679
7680 /// A vector select of 2 constant vectors can be simplified to math/logic to
7681 /// avoid a variable select instruction and possibly avoid constant loads.
7682 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
7683   SDValue Cond = N->getOperand(0);
7684   SDValue N1 = N->getOperand(1);
7685   SDValue N2 = N->getOperand(2);
7686   EVT VT = N->getValueType(0);
7687   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
7688       !TLI.convertSelectOfConstantsToMath(VT) ||
7689       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
7690       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
7691     return SDValue();
7692
7693   // Check if we can use the condition value to increment/decrement a single
7694   // constant value. This simplifies a select to an add and removes a constant
7695   // load/materialization from the general case.
7696   bool AllAddOne = true;
7697   bool AllSubOne = true;
7698   unsigned Elts = VT.getVectorNumElements();
7699   for (unsigned i = 0; i != Elts; ++i) {
7700     SDValue N1Elt = N1.getOperand(i);
7701     SDValue N2Elt = N2.getOperand(i);
7702     if (N1Elt.isUndef() || N2Elt.isUndef())
7703       continue;
7704
7705     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
7706     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
7707     if (C1 != C2 + 1)
7708       AllAddOne = false;
7709     if (C1 != C2 - 1)
7710       AllSubOne = false;
7711   }
7712
7713   // Further simplifications for the extra-special cases where the constants are
7714   // all 0 or all -1 should be implemented as folds of these patterns.
7715   SDLoc DL(N);
7716   if (AllAddOne || AllSubOne) {
7717     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
7718     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
7719     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
7720     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
7721     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
7722   }
7723
7724   // The general case for select-of-constants:
7725   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
7726   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
7727   // leave that to a machine-specific pass.
7728   return SDValue();
7729 }
7730
7731 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
7732   SDValue N0 = N->getOperand(0);
7733   SDValue N1 = N->getOperand(1);
7734   SDValue N2 = N->getOperand(2);
7735   SDLoc DL(N);
7736
7737   // fold (vselect C, X, X) -> X
7738   if (N1 == N2)
7739     return N1;
7740
7741   // Canonicalize integer abs.
7742   // vselect (setg[te] X,  0),  X, -X ->
7743   // vselect (setgt    X, -1),  X, -X ->
7744   // vselect (setl[te] X,  0), -X,  X ->
7745   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
7746   if (N0.getOpcode() == ISD::SETCC) {
7747     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
7748     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7749     bool isAbs = false;
7750     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
7751
7752     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
7753          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
7754         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
7755       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
7756     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
7757              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
7758       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7759
7760     if (isAbs) {
7761       EVT VT = LHS.getValueType();
7762       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
7763         return DAG.getNode(ISD::ABS, DL, VT, LHS);
7764
7765       SDValue Shift = DAG.getNode(
7766           ISD::SRA, DL, VT, LHS,
7767           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
7768       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
7769       AddToWorklist(Shift.getNode());
7770       AddToWorklist(Add.getNode());
7771       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
7772     }
7773
7774     // If this select has a condition (setcc) with narrower operands than the
7775     // select, try to widen the compare to match the select width.
7776     // TODO: This should be extended to handle any constant.
7777     // TODO: This could be extended to handle non-loading patterns, but that
7778     //       requires thorough testing to avoid regressions.
7779     if (isNullConstantOrNullSplatConstant(RHS)) {
7780       EVT NarrowVT = LHS.getValueType();
7781       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
7782       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
7783       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
7784       unsigned WideWidth = WideVT.getScalarSizeInBits();
7785       bool IsSigned = isSignedIntSetCC(CC);
7786       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7787       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
7788           SetCCWidth != 1 && SetCCWidth < WideWidth &&
7789           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
7790           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
7791         // Both compare operands can be widened for free. The LHS can use an
7792         // extended load, and the RHS is a constant:
7793         //   vselect (ext (setcc load(X), C)), N1, N2 -->
7794         //   vselect (setcc extload(X), C'), N1, N2
7795         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7796         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
7797         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
7798         EVT WideSetCCVT = getSetCCResultType(WideVT);
7799         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
7800         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
7801       }
7802     }
7803   }
7804
7805   if (SimplifySelectOps(N, N1, N2))
7806     return SDValue(N, 0);  // Don't revisit N.
7807
7808   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
7809   if (ISD::isBuildVectorAllOnes(N0.getNode()))
7810     return N1;
7811   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
7812   if (ISD::isBuildVectorAllZeros(N0.getNode()))
7813     return N2;
7814
7815   // The ConvertSelectToConcatVector function is assuming both the above
7816   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
7817   // and addressed.
7818   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
7819       N2.getOpcode() == ISD::CONCAT_VECTORS &&
7820       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
7821     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
7822       return CV;
7823   }
7824
7825   if (SDValue V = foldVSelectOfConstants(N))
7826     return V;
7827
7828   return SDValue();
7829 }
7830
7831 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
7832   SDValue N0 = N->getOperand(0);
7833   SDValue N1 = N->getOperand(1);
7834   SDValue N2 = N->getOperand(2);
7835   SDValue N3 = N->getOperand(3);
7836   SDValue N4 = N->getOperand(4);
7837   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
7838
7839   // fold select_cc lhs, rhs, x, x, cc -> x
7840   if (N2 == N3)
7841     return N2;
7842
7843   // Determine if the condition we're dealing with is constant
7844   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
7845                                   CC, SDLoc(N), false)) {
7846     AddToWorklist(SCC.getNode());
7847
7848     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
7849       if (!SCCC->isNullValue())
7850         return N2;    // cond always true -> true val
7851       else
7852         return N3;    // cond always false -> false val
7853     } else if (SCC->isUndef()) {
7854       // When the condition is UNDEF, just return the first operand. This is
7855       // coherent the DAG creation, no setcc node is created in this case
7856       return N2;
7857     } else if (SCC.getOpcode() == ISD::SETCC) {
7858       // Fold to a simpler select_cc
7859       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
7860                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
7861                          SCC.getOperand(2));
7862     }
7863   }
7864
7865   // If we can fold this based on the true/false value, do so.
7866   if (SimplifySelectOps(N, N2, N3))
7867     return SDValue(N, 0);  // Don't revisit N.
7868
7869   // fold select_cc into other things, such as min/max/abs
7870   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
7871 }
7872
7873 SDValue DAGCombiner::visitSETCC(SDNode *N) {
7874   // setcc is very commonly used as an argument to brcond. This pattern
7875   // also lend itself to numerous combines and, as a result, it is desired
7876   // we keep the argument to a brcond as a setcc as much as possible.
7877   bool PreferSetCC =
7878       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
7879
7880   SDValue Combined = SimplifySetCC(
7881       N->getValueType(0), N->getOperand(0), N->getOperand(1),
7882       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
7883
7884   if (!Combined)
7885     return SDValue();
7886
7887   // If we prefer to have a setcc, and we don't, we'll try our best to
7888   // recreate one using rebuildSetCC.
7889   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
7890     SDValue NewSetCC = rebuildSetCC(Combined);
7891
7892     // We don't have anything interesting to combine to.
7893     if (NewSetCC.getNode() == N)
7894       return SDValue();
7895
7896     if (NewSetCC)
7897       return NewSetCC;
7898   }
7899
7900   return Combined;
7901 }
7902
7903 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
7904   SDValue LHS = N->getOperand(0);
7905   SDValue RHS = N->getOperand(1);
7906   SDValue Carry = N->getOperand(2);
7907   SDValue Cond = N->getOperand(3);
7908
7909   // If Carry is false, fold to a regular SETCC.
7910   if (isNullConstant(Carry))
7911     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
7912
7913   return SDValue();
7914 }
7915
7916 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
7917 /// a build_vector of constants.
7918 /// This function is called by the DAGCombiner when visiting sext/zext/aext
7919 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
7920 /// Vector extends are not folded if operations are legal; this is to
7921 /// avoid introducing illegal build_vector dag nodes.
7922 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
7923                                          SelectionDAG &DAG, bool LegalTypes,
7924                                          bool LegalOperations) {
7925   unsigned Opcode = N->getOpcode();
7926   SDValue N0 = N->getOperand(0);
7927   EVT VT = N->getValueType(0);
7928
7929   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
7930          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7931          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
7932          && "Expected EXTEND dag node in input!");
7933
7934   // fold (sext c1) -> c1
7935   // fold (zext c1) -> c1
7936   // fold (aext c1) -> c1
7937   if (isa<ConstantSDNode>(N0))
7938     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
7939
7940   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
7941   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
7942   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
7943   EVT SVT = VT.getScalarType();
7944   if (!(VT.isVector() &&
7945       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
7946       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
7947     return nullptr;
7948
7949   // We can fold this node into a build_vector.
7950   unsigned VTBits = SVT.getSizeInBits();
7951   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
7952   SmallVector<SDValue, 8> Elts;
7953   unsigned NumElts = VT.getVectorNumElements();
7954   SDLoc DL(N);
7955
7956   for (unsigned i=0; i != NumElts; ++i) {
7957     SDValue Op = N0->getOperand(i);
7958     if (Op->isUndef()) {
7959       Elts.push_back(DAG.getUNDEF(SVT));
7960       continue;
7961     }
7962
7963     SDLoc DL(Op);
7964     // Get the constant value and if needed trunc it to the size of the type.
7965     // Nodes like build_vector might have constants wider than the scalar type.
7966     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
7967     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
7968       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
7969     else
7970       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
7971   }
7972
7973   return DAG.getBuildVector(VT, DL, Elts).getNode();
7974 }
7975
7976 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
7977 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
7978 // transformation. Returns true if extension are possible and the above
7979 // mentioned transformation is profitable.
7980 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
7981                                     unsigned ExtOpc,
7982                                     SmallVectorImpl<SDNode *> &ExtendNodes,
7983                                     const TargetLowering &TLI) {
7984   bool HasCopyToRegUses = false;
7985   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
7986   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
7987                             UE = N0.getNode()->use_end();
7988        UI != UE; ++UI) {
7989     SDNode *User = *UI;
7990     if (User == N)
7991       continue;
7992     if (UI.getUse().getResNo() != N0.getResNo())
7993       continue;
7994     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
7995     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
7996       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
7997       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
7998         // Sign bits will be lost after a zext.
7999         return false;
8000       bool Add = false;
8001       for (unsigned i = 0; i != 2; ++i) {
8002         SDValue UseOp = User->getOperand(i);
8003         if (UseOp == N0)
8004           continue;
8005         if (!isa<ConstantSDNode>(UseOp))
8006           return false;
8007         Add = true;
8008       }
8009       if (Add)
8010         ExtendNodes.push_back(User);
8011       continue;
8012     }
8013     // If truncates aren't free and there are users we can't
8014     // extend, it isn't worthwhile.
8015     if (!isTruncFree)
8016       return false;
8017     // Remember if this value is live-out.
8018     if (User->getOpcode() == ISD::CopyToReg)
8019       HasCopyToRegUses = true;
8020   }
8021
8022   if (HasCopyToRegUses) {
8023     bool BothLiveOut = false;
8024     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8025          UI != UE; ++UI) {
8026       SDUse &Use = UI.getUse();
8027       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8028         BothLiveOut = true;
8029         break;
8030       }
8031     }
8032     if (BothLiveOut)
8033       // Both unextended and extended values are live out. There had better be
8034       // a good reason for the transformation.
8035       return ExtendNodes.size();
8036   }
8037   return true;
8038 }
8039
8040 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8041                                   SDValue OrigLoad, SDValue ExtLoad,
8042                                   ISD::NodeType ExtType) {
8043   // Extend SetCC uses if necessary.
8044   SDLoc DL(ExtLoad);
8045   for (SDNode *SetCC : SetCCs) {
8046     SmallVector<SDValue, 4> Ops;
8047
8048     for (unsigned j = 0; j != 2; ++j) {
8049       SDValue SOp = SetCC->getOperand(j);
8050       if (SOp == OrigLoad)
8051         Ops.push_back(ExtLoad);
8052       else
8053         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8054     }
8055
8056     Ops.push_back(SetCC->getOperand(2));
8057     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8058   }
8059 }
8060
8061 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8062 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8063   SDValue N0 = N->getOperand(0);
8064   EVT DstVT = N->getValueType(0);
8065   EVT SrcVT = N0.getValueType();
8066
8067   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8068           N->getOpcode() == ISD::ZERO_EXTEND) &&
8069          "Unexpected node type (not an extend)!");
8070
8071   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8072   // For example, on a target with legal v4i32, but illegal v8i32, turn:
8073   //   (v8i32 (sext (v8i16 (load x))))
8074   // into:
8075   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
8076   //                          (v4i32 (sextload (x + 16)))))
8077   // Where uses of the original load, i.e.:
8078   //   (v8i16 (load x))
8079   // are replaced with:
8080   //   (v8i16 (truncate
8081   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
8082   //                            (v4i32 (sextload (x + 16)))))))
8083   //
8084   // This combine is only applicable to illegal, but splittable, vectors.
8085   // All legal types, and illegal non-vector types, are handled elsewhere.
8086   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8087   //
8088   if (N0->getOpcode() != ISD::LOAD)
8089     return SDValue();
8090
8091   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8092
8093   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
8094       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
8095       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
8096     return SDValue();
8097
8098   SmallVector<SDNode *, 4> SetCCs;
8099   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
8100     return SDValue();
8101
8102   ISD::LoadExtType ExtType =
8103       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8104
8105   // Try to split the vector types to get down to legal types.
8106   EVT SplitSrcVT = SrcVT;
8107   EVT SplitDstVT = DstVT;
8108   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
8109          SplitSrcVT.getVectorNumElements() > 1) {
8110     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
8111     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
8112   }
8113
8114   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
8115     return SDValue();
8116
8117   SDLoc DL(N);
8118   const unsigned NumSplits =
8119       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
8120   const unsigned Stride = SplitSrcVT.getStoreSize();
8121   SmallVector<SDValue, 4> Loads;
8122   SmallVector<SDValue, 4> Chains;
8123
8124   SDValue BasePtr = LN0->getBasePtr();
8125   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
8126     const unsigned Offset = Idx * Stride;
8127     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
8128
8129     SDValue SplitLoad = DAG.getExtLoad(
8130         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
8131         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
8132         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8133
8134     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8135                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
8136
8137     Loads.push_back(SplitLoad.getValue(0));
8138     Chains.push_back(SplitLoad.getValue(1));
8139   }
8140
8141   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
8142   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
8143
8144   // Simplify TF.
8145   AddToWorklist(NewChain.getNode());
8146
8147   CombineTo(N, NewValue);
8148
8149   // Replace uses of the original load (before extension)
8150   // with a truncate of the concatenated sextloaded vectors.
8151   SDValue Trunc =
8152       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
8153   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
8154   CombineTo(N0.getNode(), Trunc, NewChain);
8155   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8156 }
8157
8158 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8159 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8160 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
8161   assert(N->getOpcode() == ISD::ZERO_EXTEND);
8162   EVT VT = N->getValueType(0);
8163
8164   // and/or/xor
8165   SDValue N0 = N->getOperand(0);
8166   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8167         N0.getOpcode() == ISD::XOR) ||
8168       N0.getOperand(1).getOpcode() != ISD::Constant ||
8169       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
8170     return SDValue();
8171
8172   // shl/shr
8173   SDValue N1 = N0->getOperand(0);
8174   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
8175       N1.getOperand(1).getOpcode() != ISD::Constant ||
8176       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
8177     return SDValue();
8178
8179   // load
8180   if (!isa<LoadSDNode>(N1.getOperand(0)))
8181     return SDValue();
8182   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
8183   EVT MemVT = Load->getMemoryVT();
8184   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
8185       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
8186     return SDValue();
8187
8188
8189   // If the shift op is SHL, the logic op must be AND, otherwise the result
8190   // will be wrong.
8191   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
8192     return SDValue();
8193
8194   if (!N0.hasOneUse() || !N1.hasOneUse())
8195     return SDValue();
8196
8197   SmallVector<SDNode*, 4> SetCCs;
8198   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
8199                                ISD::ZERO_EXTEND, SetCCs, TLI))
8200     return SDValue();
8201
8202   // Actually do the transformation.
8203   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
8204                                    Load->getChain(), Load->getBasePtr(),
8205                                    Load->getMemoryVT(), Load->getMemOperand());
8206
8207   SDLoc DL1(N1);
8208   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
8209                               N1.getOperand(1));
8210
8211   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8212   Mask = Mask.zext(VT.getSizeInBits());
8213   SDLoc DL0(N0);
8214   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
8215                             DAG.getConstant(Mask, DL0, VT));
8216
8217   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8218   CombineTo(N, And);
8219   if (SDValue(Load, 0).hasOneUse()) {
8220     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
8221   } else {
8222     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
8223                                 Load->getValueType(0), ExtLoad);
8224     CombineTo(Load, Trunc, ExtLoad.getValue(1));
8225   }
8226   return SDValue(N,0); // Return N so it doesn't get rechecked!
8227 }
8228
8229 /// If we're narrowing or widening the result of a vector select and the final
8230 /// size is the same size as a setcc (compare) feeding the select, then try to
8231 /// apply the cast operation to the select's operands because matching vector
8232 /// sizes for a select condition and other operands should be more efficient.
8233 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
8234   unsigned CastOpcode = Cast->getOpcode();
8235   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
8236           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
8237           CastOpcode == ISD::FP_ROUND) &&
8238          "Unexpected opcode for vector select narrowing/widening");
8239
8240   // We only do this transform before legal ops because the pattern may be
8241   // obfuscated by target-specific operations after legalization. Do not create
8242   // an illegal select op, however, because that may be difficult to lower.
8243   EVT VT = Cast->getValueType(0);
8244   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
8245     return SDValue();
8246
8247   SDValue VSel = Cast->getOperand(0);
8248   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
8249       VSel.getOperand(0).getOpcode() != ISD::SETCC)
8250     return SDValue();
8251
8252   // Does the setcc have the same vector size as the casted select?
8253   SDValue SetCC = VSel.getOperand(0);
8254   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
8255   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
8256     return SDValue();
8257
8258   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8259   SDValue A = VSel.getOperand(1);
8260   SDValue B = VSel.getOperand(2);
8261   SDValue CastA, CastB;
8262   SDLoc DL(Cast);
8263   if (CastOpcode == ISD::FP_ROUND) {
8264     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8265     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
8266     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
8267   } else {
8268     CastA = DAG.getNode(CastOpcode, DL, VT, A);
8269     CastB = DAG.getNode(CastOpcode, DL, VT, B);
8270   }
8271   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
8272 }
8273
8274 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8275 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8276 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
8277                                      const TargetLowering &TLI, EVT VT,
8278                                      bool LegalOperations, SDNode *N,
8279                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
8280   SDNode *N0Node = N0.getNode();
8281   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
8282                                                    : ISD::isZEXTLoad(N0Node);
8283   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
8284       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
8285     return {};
8286
8287   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8288   EVT MemVT = LN0->getMemoryVT();
8289   if ((LegalOperations || LN0->isVolatile()) &&
8290       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
8291     return {};
8292
8293   SDValue ExtLoad =
8294       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8295                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8296   Combiner.CombineTo(N, ExtLoad);
8297   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8298   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8299 }
8300
8301 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8302 // Only generate vector extloads when 1) they're legal, and 2) they are
8303 // deemed desirable by the target.
8304 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
8305                                   const TargetLowering &TLI, EVT VT,
8306                                   bool LegalOperations, SDNode *N, SDValue N0,
8307                                   ISD::LoadExtType ExtLoadType,
8308                                   ISD::NodeType ExtOpc) {
8309   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
8310       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
8311       ((LegalOperations || VT.isVector() ||
8312         cast<LoadSDNode>(N0)->isVolatile()) &&
8313        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
8314     return {};
8315
8316   bool DoXform = true;
8317   SmallVector<SDNode *, 4> SetCCs;
8318   if (!N0.hasOneUse())
8319     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
8320   if (VT.isVector())
8321     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
8322   if (!DoXform)
8323     return {};
8324
8325   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8326   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8327                                    LN0->getBasePtr(), N0.getValueType(),
8328                                    LN0->getMemOperand());
8329   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
8330   // If the load value is used only by N, replace it via CombineTo N.
8331   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
8332   Combiner.CombineTo(N, ExtLoad);
8333   if (NoReplaceTrunc) {
8334     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8335   } else {
8336     SDValue Trunc =
8337         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
8338     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8339   }
8340   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8341 }
8342
8343 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
8344                                        bool LegalOperations) {
8345   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8346           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
8347
8348   SDValue SetCC = N->getOperand(0);
8349   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
8350       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
8351     return SDValue();
8352
8353   SDValue X = SetCC.getOperand(0);
8354   SDValue Ones = SetCC.getOperand(1);
8355   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
8356   EVT VT = N->getValueType(0);
8357   EVT XVT = X.getValueType();
8358   // setge X, C is canonicalized to setgt, so we do not need to match that
8359   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
8360   // not require the 'not' op.
8361   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
8362     // Invert and smear/shift the sign bit:
8363     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
8364     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
8365     SDLoc DL(N);
8366     SDValue NotX = DAG.getNOT(DL, X, VT);
8367     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
8368     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
8369     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
8370   }
8371   return SDValue();
8372 }
8373
8374 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
8375   SDValue N0 = N->getOperand(0);
8376   EVT VT = N->getValueType(0);
8377   SDLoc DL(N);
8378
8379   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8380                                               LegalOperations))
8381     return SDValue(Res, 0);
8382
8383   // fold (sext (sext x)) -> (sext x)
8384   // fold (sext (aext x)) -> (sext x)
8385   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8386     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
8387
8388   if (N0.getOpcode() == ISD::TRUNCATE) {
8389     // fold (sext (truncate (load x))) -> (sext (smaller load x))
8390     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8391     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8392       SDNode *oye = N0.getOperand(0).getNode();
8393       if (NarrowLoad.getNode() != N0.getNode()) {
8394         CombineTo(N0.getNode(), NarrowLoad);
8395         // CombineTo deleted the truncate, if needed, but not what's under it.
8396         AddToWorklist(oye);
8397       }
8398       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8399     }
8400
8401     // See if the value being truncated is already sign extended.  If so, just
8402     // eliminate the trunc/sext pair.
8403     SDValue Op = N0.getOperand(0);
8404     unsigned OpBits   = Op.getScalarValueSizeInBits();
8405     unsigned MidBits  = N0.getScalarValueSizeInBits();
8406     unsigned DestBits = VT.getScalarSizeInBits();
8407     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
8408
8409     if (OpBits == DestBits) {
8410       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
8411       // bits, it is already ready.
8412       if (NumSignBits > DestBits-MidBits)
8413         return Op;
8414     } else if (OpBits < DestBits) {
8415       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
8416       // bits, just sext from i32.
8417       if (NumSignBits > OpBits-MidBits)
8418         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
8419     } else {
8420       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
8421       // bits, just truncate to i32.
8422       if (NumSignBits > OpBits-MidBits)
8423         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
8424     }
8425
8426     // fold (sext (truncate x)) -> (sextinreg x).
8427     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
8428                                                  N0.getValueType())) {
8429       if (OpBits < DestBits)
8430         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
8431       else if (OpBits > DestBits)
8432         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
8433       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8434                          DAG.getValueType(N0.getValueType()));
8435     }
8436   }
8437
8438   // Try to simplify (sext (load x)).
8439   if (SDValue foldedExt =
8440           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8441                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
8442     return foldedExt;
8443
8444   // fold (sext (load x)) to multiple smaller sextloads.
8445   // Only on illegal but splittable vectors.
8446   if (SDValue ExtLoad = CombineExtLoad(N))
8447     return ExtLoad;
8448
8449   // Try to simplify (sext (sextload x)).
8450   if (SDValue foldedExt = tryToFoldExtOfExtload(
8451           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
8452     return foldedExt;
8453
8454   // fold (sext (and/or/xor (load x), cst)) ->
8455   //      (and/or/xor (sextload x), (sext cst))
8456   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8457        N0.getOpcode() == ISD::XOR) &&
8458       isa<LoadSDNode>(N0.getOperand(0)) &&
8459       N0.getOperand(1).getOpcode() == ISD::Constant &&
8460       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8461     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8462     EVT MemVT = LN00->getMemoryVT();
8463     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
8464       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
8465       SmallVector<SDNode*, 4> SetCCs;
8466       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8467                                              ISD::SIGN_EXTEND, SetCCs, TLI);
8468       if (DoXform) {
8469         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
8470                                          LN00->getChain(), LN00->getBasePtr(),
8471                                          LN00->getMemoryVT(),
8472                                          LN00->getMemOperand());
8473         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8474         Mask = Mask.sext(VT.getSizeInBits());
8475         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8476                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
8477         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
8478         bool NoReplaceTruncAnd = !N0.hasOneUse();
8479         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8480         CombineTo(N, And);
8481         // If N0 has multiple uses, change other uses as well.
8482         if (NoReplaceTruncAnd) {
8483           SDValue TruncAnd =
8484               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8485           CombineTo(N0.getNode(), TruncAnd);
8486         }
8487         if (NoReplaceTrunc) {
8488           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8489         } else {
8490           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8491                                       LN00->getValueType(0), ExtLoad);
8492           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8493         }
8494         return SDValue(N,0); // Return N so it doesn't get rechecked!
8495       }
8496     }
8497   }
8498
8499   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8500     return V;
8501
8502   if (N0.getOpcode() == ISD::SETCC) {
8503     SDValue N00 = N0.getOperand(0);
8504     SDValue N01 = N0.getOperand(1);
8505     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8506     EVT N00VT = N0.getOperand(0).getValueType();
8507
8508     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
8509     // Only do this before legalize for now.
8510     if (VT.isVector() && !LegalOperations &&
8511         TLI.getBooleanContents(N00VT) ==
8512             TargetLowering::ZeroOrNegativeOneBooleanContent) {
8513       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
8514       // of the same size as the compared operands. Only optimize sext(setcc())
8515       // if this is the case.
8516       EVT SVT = getSetCCResultType(N00VT);
8517
8518       // We know that the # elements of the results is the same as the
8519       // # elements of the compare (and the # elements of the compare result
8520       // for that matter).  Check to see that they are the same size.  If so,
8521       // we know that the element size of the sext'd result matches the
8522       // element size of the compare operands.
8523       if (VT.getSizeInBits() == SVT.getSizeInBits())
8524         return DAG.getSetCC(DL, VT, N00, N01, CC);
8525
8526       // If the desired elements are smaller or larger than the source
8527       // elements, we can use a matching integer vector type and then
8528       // truncate/sign extend.
8529       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
8530       if (SVT == MatchingVecType) {
8531         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
8532         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
8533       }
8534     }
8535
8536     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
8537     // Here, T can be 1 or -1, depending on the type of the setcc and
8538     // getBooleanContents().
8539     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
8540
8541     // To determine the "true" side of the select, we need to know the high bit
8542     // of the value returned by the setcc if it evaluates to true.
8543     // If the type of the setcc is i1, then the true case of the select is just
8544     // sext(i1 1), that is, -1.
8545     // If the type of the setcc is larger (say, i8) then the value of the high
8546     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
8547     // of the appropriate width.
8548     SDValue ExtTrueVal = (SetCCWidth == 1)
8549                              ? DAG.getAllOnesConstant(DL, VT)
8550                              : DAG.getBoolConstant(true, DL, VT, N00VT);
8551     SDValue Zero = DAG.getConstant(0, DL, VT);
8552     if (SDValue SCC =
8553             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
8554       return SCC;
8555
8556     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
8557       EVT SetCCVT = getSetCCResultType(N00VT);
8558       // Don't do this transform for i1 because there's a select transform
8559       // that would reverse it.
8560       // TODO: We should not do this transform at all without a target hook
8561       // because a sext is likely cheaper than a select?
8562       if (SetCCVT.getScalarSizeInBits() != 1 &&
8563           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
8564         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
8565         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
8566       }
8567     }
8568   }
8569
8570   // fold (sext x) -> (zext x) if the sign bit is known zero.
8571   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
8572       DAG.SignBitIsZero(N0))
8573     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
8574
8575   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8576     return NewVSel;
8577
8578   return SDValue();
8579 }
8580
8581 // isTruncateOf - If N is a truncate of some other value, return true, record
8582 // the value being truncated in Op and which of Op's bits are zero/one in Known.
8583 // This function computes KnownBits to avoid a duplicated call to
8584 // computeKnownBits in the caller.
8585 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
8586                          KnownBits &Known) {
8587   if (N->getOpcode() == ISD::TRUNCATE) {
8588     Op = N->getOperand(0);
8589     DAG.computeKnownBits(Op, Known);
8590     return true;
8591   }
8592
8593   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
8594       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
8595     return false;
8596
8597   SDValue Op0 = N->getOperand(0);
8598   SDValue Op1 = N->getOperand(1);
8599   assert(Op0.getValueType() == Op1.getValueType());
8600
8601   if (isNullConstant(Op0))
8602     Op = Op1;
8603   else if (isNullConstant(Op1))
8604     Op = Op0;
8605   else
8606     return false;
8607
8608   DAG.computeKnownBits(Op, Known);
8609
8610   if (!(Known.Zero | 1).isAllOnesValue())
8611     return false;
8612
8613   return true;
8614 }
8615
8616 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
8617   SDValue N0 = N->getOperand(0);
8618   EVT VT = N->getValueType(0);
8619
8620   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8621                                               LegalOperations))
8622     return SDValue(Res, 0);
8623
8624   // fold (zext (zext x)) -> (zext x)
8625   // fold (zext (aext x)) -> (zext x)
8626   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8627     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
8628                        N0.getOperand(0));
8629
8630   // fold (zext (truncate x)) -> (zext x) or
8631   //      (zext (truncate x)) -> (truncate x)
8632   // This is valid when the truncated bits of x are already zero.
8633   // FIXME: We should extend this to work for vectors too.
8634   SDValue Op;
8635   KnownBits Known;
8636   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
8637     APInt TruncatedBits =
8638       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
8639       APInt(Op.getValueSizeInBits(), 0) :
8640       APInt::getBitsSet(Op.getValueSizeInBits(),
8641                         N0.getValueSizeInBits(),
8642                         std::min(Op.getValueSizeInBits(),
8643                                  VT.getSizeInBits()));
8644     if (TruncatedBits.isSubsetOf(Known.Zero))
8645       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8646   }
8647
8648   // fold (zext (truncate x)) -> (and x, mask)
8649   if (N0.getOpcode() == ISD::TRUNCATE) {
8650     // fold (zext (truncate (load x))) -> (zext (smaller load x))
8651     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
8652     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8653       SDNode *oye = N0.getOperand(0).getNode();
8654       if (NarrowLoad.getNode() != N0.getNode()) {
8655         CombineTo(N0.getNode(), NarrowLoad);
8656         // CombineTo deleted the truncate, if needed, but not what's under it.
8657         AddToWorklist(oye);
8658       }
8659       return SDValue(N, 0); // Return N so it doesn't get rechecked!
8660     }
8661
8662     EVT SrcVT = N0.getOperand(0).getValueType();
8663     EVT MinVT = N0.getValueType();
8664
8665     // Try to mask before the extension to avoid having to generate a larger mask,
8666     // possibly over several sub-vectors.
8667     if (SrcVT.bitsLT(VT) && VT.isVector()) {
8668       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
8669                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
8670         SDValue Op = N0.getOperand(0);
8671         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8672         AddToWorklist(Op.getNode());
8673         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8674         // Transfer the debug info; the new node is equivalent to N0.
8675         DAG.transferDbgValues(N0, ZExtOrTrunc);
8676         return ZExtOrTrunc;
8677       }
8678     }
8679
8680     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
8681       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8682       AddToWorklist(Op.getNode());
8683       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8684       // We may safely transfer the debug info describing the truncate node over
8685       // to the equivalent and operation.
8686       DAG.transferDbgValues(N0, And);
8687       return And;
8688     }
8689   }
8690
8691   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
8692   // if either of the casts is not free.
8693   if (N0.getOpcode() == ISD::AND &&
8694       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8695       N0.getOperand(1).getOpcode() == ISD::Constant &&
8696       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8697                            N0.getValueType()) ||
8698        !TLI.isZExtFree(N0.getValueType(), VT))) {
8699     SDValue X = N0.getOperand(0).getOperand(0);
8700     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
8701     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8702     Mask = Mask.zext(VT.getSizeInBits());
8703     SDLoc DL(N);
8704     return DAG.getNode(ISD::AND, DL, VT,
8705                        X, DAG.getConstant(Mask, DL, VT));
8706   }
8707
8708   // Try to simplify (zext (load x)).
8709   if (SDValue foldedExt =
8710           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8711                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
8712     return foldedExt;
8713
8714   // fold (zext (load x)) to multiple smaller zextloads.
8715   // Only on illegal but splittable vectors.
8716   if (SDValue ExtLoad = CombineExtLoad(N))
8717     return ExtLoad;
8718
8719   // fold (zext (and/or/xor (load x), cst)) ->
8720   //      (and/or/xor (zextload x), (zext cst))
8721   // Unless (and (load x) cst) will match as a zextload already and has
8722   // additional users.
8723   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8724        N0.getOpcode() == ISD::XOR) &&
8725       isa<LoadSDNode>(N0.getOperand(0)) &&
8726       N0.getOperand(1).getOpcode() == ISD::Constant &&
8727       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8728     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8729     EVT MemVT = LN00->getMemoryVT();
8730     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
8731         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
8732       bool DoXform = true;
8733       SmallVector<SDNode*, 4> SetCCs;
8734       if (!N0.hasOneUse()) {
8735         if (N0.getOpcode() == ISD::AND) {
8736           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
8737           EVT LoadResultTy = AndC->getValueType(0);
8738           EVT ExtVT;
8739           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
8740             DoXform = false;
8741         }
8742       }
8743       if (DoXform)
8744         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8745                                           ISD::ZERO_EXTEND, SetCCs, TLI);
8746       if (DoXform) {
8747         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
8748                                          LN00->getChain(), LN00->getBasePtr(),
8749                                          LN00->getMemoryVT(),
8750                                          LN00->getMemOperand());
8751         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8752         Mask = Mask.zext(VT.getSizeInBits());
8753         SDLoc DL(N);
8754         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8755                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
8756         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8757         bool NoReplaceTruncAnd = !N0.hasOneUse();
8758         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8759         CombineTo(N, And);
8760         // If N0 has multiple uses, change other uses as well.
8761         if (NoReplaceTruncAnd) {
8762           SDValue TruncAnd =
8763               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8764           CombineTo(N0.getNode(), TruncAnd);
8765         }
8766         if (NoReplaceTrunc) {
8767           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8768         } else {
8769           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8770                                       LN00->getValueType(0), ExtLoad);
8771           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8772         }
8773         return SDValue(N,0); // Return N so it doesn't get rechecked!
8774       }
8775     }
8776   }
8777
8778   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8779   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8780   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
8781     return ZExtLoad;
8782
8783   // Try to simplify (zext (zextload x)).
8784   if (SDValue foldedExt = tryToFoldExtOfExtload(
8785           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
8786     return foldedExt;
8787
8788   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8789     return V;
8790
8791   if (N0.getOpcode() == ISD::SETCC) {
8792     // Only do this before legalize for now.
8793     if (!LegalOperations && VT.isVector() &&
8794         N0.getValueType().getVectorElementType() == MVT::i1) {
8795       EVT N00VT = N0.getOperand(0).getValueType();
8796       if (getSetCCResultType(N00VT) == N0.getValueType())
8797         return SDValue();
8798
8799       // We know that the # elements of the results is the same as the #
8800       // elements of the compare (and the # elements of the compare result for
8801       // that matter). Check to see that they are the same size. If so, we know
8802       // that the element size of the sext'd result matches the element size of
8803       // the compare operands.
8804       SDLoc DL(N);
8805       SDValue VecOnes = DAG.getConstant(1, DL, VT);
8806       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
8807         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
8808         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
8809                                      N0.getOperand(1), N0.getOperand(2));
8810         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
8811       }
8812
8813       // If the desired elements are smaller or larger than the source
8814       // elements we can use a matching integer vector type and then
8815       // truncate/sign extend.
8816       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8817       SDValue VsetCC =
8818           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
8819                       N0.getOperand(1), N0.getOperand(2));
8820       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
8821                          VecOnes);
8822     }
8823
8824     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8825     SDLoc DL(N);
8826     if (SDValue SCC = SimplifySelectCC(
8827             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8828             DAG.getConstant(0, DL, VT),
8829             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8830       return SCC;
8831   }
8832
8833   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
8834   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
8835       isa<ConstantSDNode>(N0.getOperand(1)) &&
8836       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
8837       N0.hasOneUse()) {
8838     SDValue ShAmt = N0.getOperand(1);
8839     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
8840     if (N0.getOpcode() == ISD::SHL) {
8841       SDValue InnerZExt = N0.getOperand(0);
8842       // If the original shl may be shifting out bits, do not perform this
8843       // transformation.
8844       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
8845         InnerZExt.getOperand(0).getValueSizeInBits();
8846       if (ShAmtVal > KnownZeroBits)
8847         return SDValue();
8848     }
8849
8850     SDLoc DL(N);
8851
8852     // Ensure that the shift amount is wide enough for the shifted value.
8853     if (VT.getSizeInBits() >= 256)
8854       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
8855
8856     return DAG.getNode(N0.getOpcode(), DL, VT,
8857                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
8858                        ShAmt);
8859   }
8860
8861   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8862     return NewVSel;
8863
8864   return SDValue();
8865 }
8866
8867 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
8868   SDValue N0 = N->getOperand(0);
8869   EVT VT = N->getValueType(0);
8870
8871   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8872                                               LegalOperations))
8873     return SDValue(Res, 0);
8874
8875   // fold (aext (aext x)) -> (aext x)
8876   // fold (aext (zext x)) -> (zext x)
8877   // fold (aext (sext x)) -> (sext x)
8878   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
8879       N0.getOpcode() == ISD::ZERO_EXTEND ||
8880       N0.getOpcode() == ISD::SIGN_EXTEND)
8881     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8882
8883   // fold (aext (truncate (load x))) -> (aext (smaller load x))
8884   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
8885   if (N0.getOpcode() == ISD::TRUNCATE) {
8886     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8887       SDNode *oye = N0.getOperand(0).getNode();
8888       if (NarrowLoad.getNode() != N0.getNode()) {
8889         CombineTo(N0.getNode(), NarrowLoad);
8890         // CombineTo deleted the truncate, if needed, but not what's under it.
8891         AddToWorklist(oye);
8892       }
8893       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8894     }
8895   }
8896
8897   // fold (aext (truncate x))
8898   if (N0.getOpcode() == ISD::TRUNCATE)
8899     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8900
8901   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
8902   // if the trunc is not free.
8903   if (N0.getOpcode() == ISD::AND &&
8904       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8905       N0.getOperand(1).getOpcode() == ISD::Constant &&
8906       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8907                           N0.getValueType())) {
8908     SDLoc DL(N);
8909     SDValue X = N0.getOperand(0).getOperand(0);
8910     X = DAG.getAnyExtOrTrunc(X, DL, VT);
8911     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8912     Mask = Mask.zext(VT.getSizeInBits());
8913     return DAG.getNode(ISD::AND, DL, VT,
8914                        X, DAG.getConstant(Mask, DL, VT));
8915   }
8916
8917   // fold (aext (load x)) -> (aext (truncate (extload x)))
8918   // None of the supported targets knows how to perform load and any_ext
8919   // on vectors in one instruction.  We only perform this transformation on
8920   // scalars.
8921   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
8922       ISD::isUNINDEXEDLoad(N0.getNode()) &&
8923       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
8924     bool DoXform = true;
8925     SmallVector<SDNode*, 4> SetCCs;
8926     if (!N0.hasOneUse())
8927       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
8928                                         TLI);
8929     if (DoXform) {
8930       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8931       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
8932                                        LN0->getChain(),
8933                                        LN0->getBasePtr(), N0.getValueType(),
8934                                        LN0->getMemOperand());
8935       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
8936       // If the load value is used only by N, replace it via CombineTo N.
8937       bool NoReplaceTrunc = N0.hasOneUse();
8938       CombineTo(N, ExtLoad);
8939       if (NoReplaceTrunc) {
8940         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8941       } else {
8942         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
8943                                     N0.getValueType(), ExtLoad);
8944         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8945       }
8946       return SDValue(N, 0); // Return N so it doesn't get rechecked!
8947     }
8948   }
8949
8950   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
8951   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
8952   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
8953   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
8954       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
8955     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8956     ISD::LoadExtType ExtType = LN0->getExtensionType();
8957     EVT MemVT = LN0->getMemoryVT();
8958     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
8959       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
8960                                        VT, LN0->getChain(), LN0->getBasePtr(),
8961                                        MemVT, LN0->getMemOperand());
8962       CombineTo(N, ExtLoad);
8963       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8964       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8965     }
8966   }
8967
8968   if (N0.getOpcode() == ISD::SETCC) {
8969     // For vectors:
8970     // aext(setcc) -> vsetcc
8971     // aext(setcc) -> truncate(vsetcc)
8972     // aext(setcc) -> aext(vsetcc)
8973     // Only do this before legalize for now.
8974     if (VT.isVector() && !LegalOperations) {
8975       EVT N00VT = N0.getOperand(0).getValueType();
8976       if (getSetCCResultType(N00VT) == N0.getValueType())
8977         return SDValue();
8978
8979       // We know that the # elements of the results is the same as the
8980       // # elements of the compare (and the # elements of the compare result
8981       // for that matter).  Check to see that they are the same size.  If so,
8982       // we know that the element size of the sext'd result matches the
8983       // element size of the compare operands.
8984       if (VT.getSizeInBits() == N00VT.getSizeInBits())
8985         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
8986                              N0.getOperand(1),
8987                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
8988       // If the desired elements are smaller or larger than the source
8989       // elements we can use a matching integer vector type and then
8990       // truncate/any extend
8991       else {
8992         EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8993         SDValue VsetCC =
8994           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
8995                         N0.getOperand(1),
8996                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
8997         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
8998       }
8999     }
9000
9001     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9002     SDLoc DL(N);
9003     if (SDValue SCC = SimplifySelectCC(
9004             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9005             DAG.getConstant(0, DL, VT),
9006             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9007       return SCC;
9008   }
9009
9010   return SDValue();
9011 }
9012
9013 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
9014   unsigned Opcode = N->getOpcode();
9015   SDValue N0 = N->getOperand(0);
9016   SDValue N1 = N->getOperand(1);
9017   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9018
9019   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9020   if (N0.getOpcode() == Opcode &&
9021       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9022     return N0;
9023
9024   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9025       N0.getOperand(0).getOpcode() == Opcode) {
9026     // We have an assert, truncate, assert sandwich. Make one stronger assert
9027     // by asserting on the smallest asserted type to the larger source type.
9028     // This eliminates the later assert:
9029     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9030     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9031     SDValue BigA = N0.getOperand(0);
9032     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9033     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9034            "Asserting zero/sign-extended bits to a type larger than the "
9035            "truncated destination does not provide information");
9036
9037     SDLoc DL(N);
9038     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9039     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9040     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9041                                     BigA.getOperand(0), MinAssertVTVal);
9042     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9043   }
9044
9045   return SDValue();
9046 }
9047
9048 /// If the result of a wider load is shifted to right of N  bits and then
9049 /// truncated to a narrower type and where N is a multiple of number of bits of
9050 /// the narrower type, transform it to a narrower load from address + N / num of
9051 /// bits of new type. Also narrow the load if the result is masked with an AND
9052 /// to effectively produce a smaller type. If the result is to be extended, also
9053 /// fold the extension to form a extending load.
9054 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
9055   unsigned Opc = N->getOpcode();
9056
9057   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
9058   SDValue N0 = N->getOperand(0);
9059   EVT VT = N->getValueType(0);
9060   EVT ExtVT = VT;
9061
9062   // This transformation isn't valid for vector loads.
9063   if (VT.isVector())
9064     return SDValue();
9065
9066   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9067   // extended to VT.
9068   if (Opc == ISD::SIGN_EXTEND_INREG) {
9069     ExtType = ISD::SEXTLOAD;
9070     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9071   } else if (Opc == ISD::SRL) {
9072     // Another special-case: SRL is basically zero-extending a narrower value,
9073     // or it maybe shifting a higher subword, half or byte into the lowest
9074     // bits.
9075     ExtType = ISD::ZEXTLOAD;
9076     N0 = SDValue(N, 0);
9077
9078     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
9079     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9080     if (!N01 || !LN0)
9081       return SDValue();
9082
9083     uint64_t ShiftAmt = N01->getZExtValue();
9084     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
9085     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
9086       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
9087     else
9088       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
9089                                 VT.getSizeInBits() - ShiftAmt);
9090   } else if (Opc == ISD::AND) {
9091     // An AND with a constant mask is the same as a truncate + zero-extend.
9092     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
9093     if (!AndC || !AndC->getAPIntValue().isMask())
9094       return SDValue();
9095
9096     unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
9097     ExtType = ISD::ZEXTLOAD;
9098     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
9099   }
9100
9101   unsigned ShAmt = 0;
9102   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
9103     SDValue SRL = N0;
9104     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
9105       ShAmt = ConstShift->getZExtValue();
9106       unsigned EVTBits = ExtVT.getSizeInBits();
9107       // Is the shift amount a multiple of size of VT?
9108       if ((ShAmt & (EVTBits-1)) == 0) {
9109         N0 = N0.getOperand(0);
9110         // Is the load width a multiple of size of VT?
9111         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
9112           return SDValue();
9113       }
9114
9115       // At this point, we must have a load or else we can't do the transform.
9116       if (!isa<LoadSDNode>(N0)) return SDValue();
9117
9118       auto *LN0 = cast<LoadSDNode>(N0);
9119
9120       // Because a SRL must be assumed to *need* to zero-extend the high bits
9121       // (as opposed to anyext the high bits), we can't combine the zextload
9122       // lowering of SRL and an sextload.
9123       if (LN0->getExtensionType() == ISD::SEXTLOAD)
9124         return SDValue();
9125
9126       // If the shift amount is larger than the input type then we're not
9127       // accessing any of the loaded bytes.  If the load was a zextload/extload
9128       // then the result of the shift+trunc is zero/undef (handled elsewhere).
9129       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
9130         return SDValue();
9131
9132       // If the SRL is only used by a masking AND, we may be able to adjust
9133       // the ExtVT to make the AND redundant.
9134       SDNode *Mask = *(SRL->use_begin());
9135       if (Mask->getOpcode() == ISD::AND &&
9136           isa<ConstantSDNode>(Mask->getOperand(1))) {
9137         const APInt &ShiftMask =
9138           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
9139         if (ShiftMask.isMask()) {
9140           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
9141                                            ShiftMask.countTrailingOnes());
9142           // If the mask is smaller, recompute the type.
9143           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
9144               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
9145             ExtVT = MaskedVT;
9146         }
9147       }
9148     }
9149   }
9150
9151   // If the load is shifted left (and the result isn't shifted back right),
9152   // we can fold the truncate through the shift.
9153   unsigned ShLeftAmt = 0;
9154   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9155       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
9156     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
9157       ShLeftAmt = N01->getZExtValue();
9158       N0 = N0.getOperand(0);
9159     }
9160   }
9161
9162   // If we haven't found a load, we can't narrow it.
9163   if (!isa<LoadSDNode>(N0))
9164     return SDValue();
9165
9166   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9167   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
9168     return SDValue();
9169
9170   // For big endian targets, we need to adjust the offset to the pointer to
9171   // load the correct bytes.
9172   if (DAG.getDataLayout().isBigEndian()) {
9173     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
9174     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
9175     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
9176   }
9177
9178   EVT PtrType = N0.getOperand(1).getValueType();
9179   uint64_t PtrOff = ShAmt / 8;
9180   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
9181   SDLoc DL(LN0);
9182   // The original load itself didn't wrap, so an offset within it doesn't.
9183   SDNodeFlags Flags;
9184   Flags.setNoUnsignedWrap(true);
9185   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
9186                                PtrType, LN0->getBasePtr(),
9187                                DAG.getConstant(PtrOff, DL, PtrType),
9188                                Flags);
9189   AddToWorklist(NewPtr.getNode());
9190
9191   SDValue Load;
9192   if (ExtType == ISD::NON_EXTLOAD)
9193     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
9194                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9195                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9196   else
9197     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
9198                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
9199                           NewAlign, LN0->getMemOperand()->getFlags(),
9200                           LN0->getAAInfo());
9201
9202   // Replace the old load's chain with the new load's chain.
9203   WorklistRemover DeadNodes(*this);
9204   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9205
9206   // Shift the result left, if we've swallowed a left shift.
9207   SDValue Result = Load;
9208   if (ShLeftAmt != 0) {
9209     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
9210     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
9211       ShImmTy = VT;
9212     // If the shift amount is as large as the result size (but, presumably,
9213     // no larger than the source) then the useful bits of the result are
9214     // zero; we can't simply return the shortened shift, because the result
9215     // of that operation is undefined.
9216     SDLoc DL(N0);
9217     if (ShLeftAmt >= VT.getSizeInBits())
9218       Result = DAG.getConstant(0, DL, VT);
9219     else
9220       Result = DAG.getNode(ISD::SHL, DL, VT,
9221                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
9222   }
9223
9224   // Return the new loaded value.
9225   return Result;
9226 }
9227
9228 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
9229   SDValue N0 = N->getOperand(0);
9230   SDValue N1 = N->getOperand(1);
9231   EVT VT = N->getValueType(0);
9232   EVT EVT = cast<VTSDNode>(N1)->getVT();
9233   unsigned VTBits = VT.getScalarSizeInBits();
9234   unsigned EVTBits = EVT.getScalarSizeInBits();
9235
9236   if (N0.isUndef())
9237     return DAG.getUNDEF(VT);
9238
9239   // fold (sext_in_reg c1) -> c1
9240   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9241     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
9242
9243   // If the input is already sign extended, just drop the extension.
9244   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
9245     return N0;
9246
9247   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9248   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
9249       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
9250     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9251                        N0.getOperand(0), N1);
9252
9253   // fold (sext_in_reg (sext x)) -> (sext x)
9254   // fold (sext_in_reg (aext x)) -> (sext x)
9255   // if x is small enough.
9256   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
9257     SDValue N00 = N0.getOperand(0);
9258     if (N00.getScalarValueSizeInBits() <= EVTBits &&
9259         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9260       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9261   }
9262
9263   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9264   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
9265        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9266        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
9267       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
9268     if (!LegalOperations ||
9269         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
9270       return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
9271   }
9272
9273   // fold (sext_in_reg (zext x)) -> (sext x)
9274   // iff we are extending the source sign bit.
9275   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
9276     SDValue N00 = N0.getOperand(0);
9277     if (N00.getScalarValueSizeInBits() == EVTBits &&
9278         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9279       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9280   }
9281
9282   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
9283   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
9284     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
9285
9286   // fold operands of sext_in_reg based on knowledge that the top bits are not
9287   // demanded.
9288   if (SimplifyDemandedBits(SDValue(N, 0)))
9289     return SDValue(N, 0);
9290
9291   // fold (sext_in_reg (load x)) -> (smaller sextload x)
9292   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
9293   if (SDValue NarrowLoad = ReduceLoadWidth(N))
9294     return NarrowLoad;
9295
9296   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
9297   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
9298   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
9299   if (N0.getOpcode() == ISD::SRL) {
9300     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
9301       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
9302         // We can turn this into an SRA iff the input to the SRL is already sign
9303         // extended enough.
9304         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
9305         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
9306           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
9307                              N0.getOperand(0), N0.getOperand(1));
9308       }
9309   }
9310
9311   // fold (sext_inreg (extload x)) -> (sextload x)
9312   // If sextload is not supported by target, we can only do the combine when
9313   // load has one use. Doing otherwise can block folding the extload with other
9314   // extends that the target does support.
9315   if (ISD::isEXTLoad(N0.getNode()) &&
9316       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9317       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9318       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
9319         N0.hasOneUse()) ||
9320        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9321     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9322     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9323                                      LN0->getChain(),
9324                                      LN0->getBasePtr(), EVT,
9325                                      LN0->getMemOperand());
9326     CombineTo(N, ExtLoad);
9327     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9328     AddToWorklist(ExtLoad.getNode());
9329     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9330   }
9331   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
9332   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
9333       N0.hasOneUse() &&
9334       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9335       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9336        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9337     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9338     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9339                                      LN0->getChain(),
9340                                      LN0->getBasePtr(), EVT,
9341                                      LN0->getMemOperand());
9342     CombineTo(N, ExtLoad);
9343     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9344     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9345   }
9346
9347   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
9348   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
9349     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
9350                                            N0.getOperand(1), false))
9351       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9352                          BSwap, N1);
9353   }
9354
9355   return SDValue();
9356 }
9357
9358 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
9359   SDValue N0 = N->getOperand(0);
9360   EVT VT = N->getValueType(0);
9361
9362   if (N0.isUndef())
9363     return DAG.getUNDEF(VT);
9364
9365   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
9366                                               LegalOperations))
9367     return SDValue(Res, 0);
9368
9369   return SDValue();
9370 }
9371
9372 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
9373   SDValue N0 = N->getOperand(0);
9374   EVT VT = N->getValueType(0);
9375
9376   if (N0.isUndef())
9377     return DAG.getUNDEF(VT);
9378
9379   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
9380                                               LegalOperations))
9381     return SDValue(Res, 0);
9382
9383   return SDValue();
9384 }
9385
9386 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
9387   SDValue N0 = N->getOperand(0);
9388   EVT VT = N->getValueType(0);
9389   bool isLE = DAG.getDataLayout().isLittleEndian();
9390
9391   // noop truncate
9392   if (N0.getValueType() == N->getValueType(0))
9393     return N0;
9394
9395   // fold (truncate (truncate x)) -> (truncate x)
9396   if (N0.getOpcode() == ISD::TRUNCATE)
9397     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9398
9399   // fold (truncate c1) -> c1
9400   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
9401     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
9402     if (C.getNode() != N)
9403       return C;
9404   }
9405
9406   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
9407   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
9408       N0.getOpcode() == ISD::SIGN_EXTEND ||
9409       N0.getOpcode() == ISD::ANY_EXTEND) {
9410     // if the source is smaller than the dest, we still need an extend.
9411     if (N0.getOperand(0).getValueType().bitsLT(VT))
9412       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9413     // if the source is larger than the dest, than we just need the truncate.
9414     if (N0.getOperand(0).getValueType().bitsGT(VT))
9415       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9416     // if the source and dest are the same type, we can drop both the extend
9417     // and the truncate.
9418     return N0.getOperand(0);
9419   }
9420
9421   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
9422   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
9423     return SDValue();
9424
9425   // Fold extract-and-trunc into a narrow extract. For example:
9426   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
9427   //   i32 y = TRUNCATE(i64 x)
9428   //        -- becomes --
9429   //   v16i8 b = BITCAST (v2i64 val)
9430   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
9431   //
9432   // Note: We only run this optimization after type legalization (which often
9433   // creates this pattern) and before operation legalization after which
9434   // we need to be more careful about the vector instructions that we generate.
9435   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9436       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
9437     EVT VecTy = N0.getOperand(0).getValueType();
9438     EVT ExTy = N0.getValueType();
9439     EVT TrTy = N->getValueType(0);
9440
9441     unsigned NumElem = VecTy.getVectorNumElements();
9442     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
9443
9444     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
9445     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
9446
9447     SDValue EltNo = N0->getOperand(1);
9448     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
9449       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9450       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
9451       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
9452
9453       SDLoc DL(N);
9454       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
9455                          DAG.getBitcast(NVT, N0.getOperand(0)),
9456                          DAG.getConstant(Index, DL, IndexTy));
9457     }
9458   }
9459
9460   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
9461   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
9462     EVT SrcVT = N0.getValueType();
9463     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
9464         TLI.isTruncateFree(SrcVT, VT)) {
9465       SDLoc SL(N0);
9466       SDValue Cond = N0.getOperand(0);
9467       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9468       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
9469       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
9470     }
9471   }
9472
9473   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
9474   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9475       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
9476       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
9477     SDValue Amt = N0.getOperand(1);
9478     KnownBits Known;
9479     DAG.computeKnownBits(Amt, Known);
9480     unsigned Size = VT.getScalarSizeInBits();
9481     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
9482       SDLoc SL(N);
9483       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
9484
9485       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9486       if (AmtVT != Amt.getValueType()) {
9487         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
9488         AddToWorklist(Amt.getNode());
9489       }
9490       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
9491     }
9492   }
9493
9494   // Fold a series of buildvector, bitcast, and truncate if possible.
9495   // For example fold
9496   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
9497   //   (2xi32 (buildvector x, y)).
9498   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
9499       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
9500       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
9501       N0.getOperand(0).hasOneUse()) {
9502     SDValue BuildVect = N0.getOperand(0);
9503     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
9504     EVT TruncVecEltTy = VT.getVectorElementType();
9505
9506     // Check that the element types match.
9507     if (BuildVectEltTy == TruncVecEltTy) {
9508       // Now we only need to compute the offset of the truncated elements.
9509       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
9510       unsigned TruncVecNumElts = VT.getVectorNumElements();
9511       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
9512
9513       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
9514              "Invalid number of elements");
9515
9516       SmallVector<SDValue, 8> Opnds;
9517       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
9518         Opnds.push_back(BuildVect.getOperand(i));
9519
9520       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
9521     }
9522   }
9523
9524   // See if we can simplify the input to this truncate through knowledge that
9525   // only the low bits are being used.
9526   // For example "trunc (or (shl x, 8), y)" // -> trunc y
9527   // Currently we only perform this optimization on scalars because vectors
9528   // may have different active low bits.
9529   if (!VT.isVector()) {
9530     APInt Mask =
9531         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
9532     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
9533       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
9534   }
9535
9536   // fold (truncate (load x)) -> (smaller load x)
9537   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
9538   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
9539     if (SDValue Reduced = ReduceLoadWidth(N))
9540       return Reduced;
9541
9542     // Handle the case where the load remains an extending load even
9543     // after truncation.
9544     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
9545       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9546       if (!LN0->isVolatile() &&
9547           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
9548         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
9549                                          VT, LN0->getChain(), LN0->getBasePtr(),
9550                                          LN0->getMemoryVT(),
9551                                          LN0->getMemOperand());
9552         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
9553         return NewLoad;
9554       }
9555     }
9556   }
9557
9558   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
9559   // where ... are all 'undef'.
9560   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
9561     SmallVector<EVT, 8> VTs;
9562     SDValue V;
9563     unsigned Idx = 0;
9564     unsigned NumDefs = 0;
9565
9566     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
9567       SDValue X = N0.getOperand(i);
9568       if (!X.isUndef()) {
9569         V = X;
9570         Idx = i;
9571         NumDefs++;
9572       }
9573       // Stop if more than one members are non-undef.
9574       if (NumDefs > 1)
9575         break;
9576       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
9577                                      VT.getVectorElementType(),
9578                                      X.getValueType().getVectorNumElements()));
9579     }
9580
9581     if (NumDefs == 0)
9582       return DAG.getUNDEF(VT);
9583
9584     if (NumDefs == 1) {
9585       assert(V.getNode() && "The single defined operand is empty!");
9586       SmallVector<SDValue, 8> Opnds;
9587       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
9588         if (i != Idx) {
9589           Opnds.push_back(DAG.getUNDEF(VTs[i]));
9590           continue;
9591         }
9592         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
9593         AddToWorklist(NV.getNode());
9594         Opnds.push_back(NV);
9595       }
9596       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
9597     }
9598   }
9599
9600   // Fold truncate of a bitcast of a vector to an extract of the low vector
9601   // element.
9602   //
9603   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
9604   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
9605     SDValue VecSrc = N0.getOperand(0);
9606     EVT SrcVT = VecSrc.getValueType();
9607     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
9608         (!LegalOperations ||
9609          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
9610       SDLoc SL(N);
9611
9612       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
9613       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
9614       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
9615                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
9616     }
9617   }
9618
9619   // Simplify the operands using demanded-bits information.
9620   if (!VT.isVector() &&
9621       SimplifyDemandedBits(SDValue(N, 0)))
9622     return SDValue(N, 0);
9623
9624   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
9625   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
9626   // When the adde's carry is not used.
9627   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
9628       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
9629       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
9630     SDLoc SL(N);
9631     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9632     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9633     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
9634     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
9635   }
9636
9637   // fold (truncate (extract_subvector(ext x))) ->
9638   //      (extract_subvector x)
9639   // TODO: This can be generalized to cover cases where the truncate and extract
9640   // do not fully cancel each other out.
9641   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9642     SDValue N00 = N0.getOperand(0);
9643     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
9644         N00.getOpcode() == ISD::ZERO_EXTEND ||
9645         N00.getOpcode() == ISD::ANY_EXTEND) {
9646       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
9647           VT.getVectorElementType())
9648         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
9649                            N00.getOperand(0), N0.getOperand(1));
9650     }
9651   }
9652
9653   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9654     return NewVSel;
9655
9656   return SDValue();
9657 }
9658
9659 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
9660   SDValue Elt = N->getOperand(i);
9661   if (Elt.getOpcode() != ISD::MERGE_VALUES)
9662     return Elt.getNode();
9663   return Elt.getOperand(Elt.getResNo()).getNode();
9664 }
9665
9666 /// build_pair (load, load) -> load
9667 /// if load locations are consecutive.
9668 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
9669   assert(N->getOpcode() == ISD::BUILD_PAIR);
9670
9671   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
9672   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
9673
9674   // A BUILD_PAIR is always having the least significant part in elt 0 and the
9675   // most significant part in elt 1. So when combining into one large load, we
9676   // need to consider the endianness.
9677   if (DAG.getDataLayout().isBigEndian())
9678     std::swap(LD1, LD2);
9679
9680   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
9681       LD1->getAddressSpace() != LD2->getAddressSpace())
9682     return SDValue();
9683   EVT LD1VT = LD1->getValueType(0);
9684   unsigned LD1Bytes = LD1VT.getStoreSize();
9685   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
9686       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
9687     unsigned Align = LD1->getAlignment();
9688     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
9689         VT.getTypeForEVT(*DAG.getContext()));
9690
9691     if (NewAlign <= Align &&
9692         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
9693       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
9694                          LD1->getPointerInfo(), Align);
9695   }
9696
9697   return SDValue();
9698 }
9699
9700 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
9701   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
9702   // and Lo parts; on big-endian machines it doesn't.
9703   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
9704 }
9705
9706 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
9707                                     const TargetLowering &TLI) {
9708   // If this is not a bitcast to an FP type or if the target doesn't have
9709   // IEEE754-compliant FP logic, we're done.
9710   EVT VT = N->getValueType(0);
9711   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
9712     return SDValue();
9713
9714   // TODO: Use splat values for the constant-checking below and remove this
9715   // restriction.
9716   SDValue N0 = N->getOperand(0);
9717   EVT SourceVT = N0.getValueType();
9718   if (SourceVT.isVector())
9719     return SDValue();
9720
9721   unsigned FPOpcode;
9722   APInt SignMask;
9723   switch (N0.getOpcode()) {
9724   case ISD::AND:
9725     FPOpcode = ISD::FABS;
9726     SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
9727     break;
9728   case ISD::XOR:
9729     FPOpcode = ISD::FNEG;
9730     SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
9731     break;
9732   // TODO: ISD::OR --> ISD::FNABS?
9733   default:
9734     return SDValue();
9735   }
9736
9737   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
9738   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
9739   SDValue LogicOp0 = N0.getOperand(0);
9740   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9741   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
9742       LogicOp0.getOpcode() == ISD::BITCAST &&
9743       LogicOp0->getOperand(0).getValueType() == VT)
9744     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
9745
9746   return SDValue();
9747 }
9748
9749 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
9750   SDValue N0 = N->getOperand(0);
9751   EVT VT = N->getValueType(0);
9752
9753   if (N0.isUndef())
9754     return DAG.getUNDEF(VT);
9755
9756   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
9757   // Only do this before legalize, since afterward the target may be depending
9758   // on the bitconvert.
9759   // First check to see if this is all constant.
9760   if (!LegalTypes &&
9761       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
9762       VT.isVector()) {
9763     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
9764
9765     EVT DestEltVT = N->getValueType(0).getVectorElementType();
9766     assert(!DestEltVT.isVector() &&
9767            "Element type of vector ValueType must not be vector!");
9768     if (isSimple)
9769       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
9770   }
9771
9772   // If the input is a constant, let getNode fold it.
9773   // We always need to check that this is just a fp -> int or int -> conversion
9774   // otherwise we will get back N which will confuse the caller into thinking
9775   // we used CombineTo. This can block target combines from running. If we can't
9776   // allowed legal operations, we need to ensure the resulting operation will be
9777   // legal.
9778   // TODO: Maybe we should check that the return value isn't N explicitly?
9779   if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
9780        (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) ||
9781       (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
9782        (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT))))
9783     return DAG.getBitcast(VT, N0);
9784
9785   // (conv (conv x, t1), t2) -> (conv x, t2)
9786   if (N0.getOpcode() == ISD::BITCAST)
9787     return DAG.getBitcast(VT, N0.getOperand(0));
9788
9789   // fold (conv (load x)) -> (load (conv*)x)
9790   // If the resultant load doesn't need a higher alignment than the original!
9791   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
9792       // Do not change the width of a volatile load.
9793       !cast<LoadSDNode>(N0)->isVolatile() &&
9794       // Do not remove the cast if the types differ in endian layout.
9795       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
9796           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
9797       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
9798       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
9799     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9800     unsigned OrigAlign = LN0->getAlignment();
9801
9802     bool Fast = false;
9803     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9804                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
9805         Fast) {
9806       SDValue Load =
9807           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
9808                       LN0->getPointerInfo(), OrigAlign,
9809                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9810       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9811       return Load;
9812     }
9813   }
9814
9815   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
9816     return V;
9817
9818   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
9819   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
9820   //
9821   // For ppc_fp128:
9822   // fold (bitcast (fneg x)) ->
9823   //     flipbit = signbit
9824   //     (xor (bitcast x) (build_pair flipbit, flipbit))
9825   //
9826   // fold (bitcast (fabs x)) ->
9827   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
9828   //     (xor (bitcast x) (build_pair flipbit, flipbit))
9829   // This often reduces constant pool loads.
9830   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
9831        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
9832       N0.getNode()->hasOneUse() && VT.isInteger() &&
9833       !VT.isVector() && !N0.getValueType().isVector()) {
9834     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
9835     AddToWorklist(NewConv.getNode());
9836
9837     SDLoc DL(N);
9838     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9839       assert(VT.getSizeInBits() == 128);
9840       SDValue SignBit = DAG.getConstant(
9841           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
9842       SDValue FlipBit;
9843       if (N0.getOpcode() == ISD::FNEG) {
9844         FlipBit = SignBit;
9845         AddToWorklist(FlipBit.getNode());
9846       } else {
9847         assert(N0.getOpcode() == ISD::FABS);
9848         SDValue Hi =
9849             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
9850                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9851                                               SDLoc(NewConv)));
9852         AddToWorklist(Hi.getNode());
9853         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
9854         AddToWorklist(FlipBit.getNode());
9855       }
9856       SDValue FlipBits =
9857           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9858       AddToWorklist(FlipBits.getNode());
9859       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
9860     }
9861     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9862     if (N0.getOpcode() == ISD::FNEG)
9863       return DAG.getNode(ISD::XOR, DL, VT,
9864                          NewConv, DAG.getConstant(SignBit, DL, VT));
9865     assert(N0.getOpcode() == ISD::FABS);
9866     return DAG.getNode(ISD::AND, DL, VT,
9867                        NewConv, DAG.getConstant(~SignBit, DL, VT));
9868   }
9869
9870   // fold (bitconvert (fcopysign cst, x)) ->
9871   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
9872   // Note that we don't handle (copysign x, cst) because this can always be
9873   // folded to an fneg or fabs.
9874   //
9875   // For ppc_fp128:
9876   // fold (bitcast (fcopysign cst, x)) ->
9877   //     flipbit = (and (extract_element
9878   //                     (xor (bitcast cst), (bitcast x)), 0),
9879   //                    signbit)
9880   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
9881   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
9882       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
9883       VT.isInteger() && !VT.isVector()) {
9884     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
9885     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
9886     if (isTypeLegal(IntXVT)) {
9887       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
9888       AddToWorklist(X.getNode());
9889
9890       // If X has a different width than the result/lhs, sext it or truncate it.
9891       unsigned VTWidth = VT.getSizeInBits();
9892       if (OrigXWidth < VTWidth) {
9893         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
9894         AddToWorklist(X.getNode());
9895       } else if (OrigXWidth > VTWidth) {
9896         // To get the sign bit in the right place, we have to shift it right
9897         // before truncating.
9898         SDLoc DL(X);
9899         X = DAG.getNode(ISD::SRL, DL,
9900                         X.getValueType(), X,
9901                         DAG.getConstant(OrigXWidth-VTWidth, DL,
9902                                         X.getValueType()));
9903         AddToWorklist(X.getNode());
9904         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
9905         AddToWorklist(X.getNode());
9906       }
9907
9908       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9909         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
9910         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9911         AddToWorklist(Cst.getNode());
9912         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
9913         AddToWorklist(X.getNode());
9914         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
9915         AddToWorklist(XorResult.getNode());
9916         SDValue XorResult64 = DAG.getNode(
9917             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
9918             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9919                                   SDLoc(XorResult)));
9920         AddToWorklist(XorResult64.getNode());
9921         SDValue FlipBit =
9922             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
9923                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
9924         AddToWorklist(FlipBit.getNode());
9925         SDValue FlipBits =
9926             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9927         AddToWorklist(FlipBits.getNode());
9928         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
9929       }
9930       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9931       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
9932                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
9933       AddToWorklist(X.getNode());
9934
9935       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9936       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
9937                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
9938       AddToWorklist(Cst.getNode());
9939
9940       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
9941     }
9942   }
9943
9944   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
9945   if (N0.getOpcode() == ISD::BUILD_PAIR)
9946     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
9947       return CombineLD;
9948
9949   // Remove double bitcasts from shuffles - this is often a legacy of
9950   // XformToShuffleWithZero being used to combine bitmaskings (of
9951   // float vectors bitcast to integer vectors) into shuffles.
9952   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
9953   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
9954       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
9955       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
9956       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
9957     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
9958
9959     // If operands are a bitcast, peek through if it casts the original VT.
9960     // If operands are a constant, just bitcast back to original VT.
9961     auto PeekThroughBitcast = [&](SDValue Op) {
9962       if (Op.getOpcode() == ISD::BITCAST &&
9963           Op.getOperand(0).getValueType() == VT)
9964         return SDValue(Op.getOperand(0));
9965       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
9966           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
9967         return DAG.getBitcast(VT, Op);
9968       return SDValue();
9969     };
9970
9971     // FIXME: If either input vector is bitcast, try to convert the shuffle to
9972     // the result type of this bitcast. This would eliminate at least one
9973     // bitcast. See the transform in InstCombine.
9974     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
9975     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
9976     if (!(SV0 && SV1))
9977       return SDValue();
9978
9979     int MaskScale =
9980         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
9981     SmallVector<int, 8> NewMask;
9982     for (int M : SVN->getMask())
9983       for (int i = 0; i != MaskScale; ++i)
9984         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
9985
9986     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
9987     if (!LegalMask) {
9988       std::swap(SV0, SV1);
9989       ShuffleVectorSDNode::commuteMask(NewMask);
9990       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
9991     }
9992
9993     if (LegalMask)
9994       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
9995   }
9996
9997   return SDValue();
9998 }
9999
10000 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
10001   EVT VT = N->getValueType(0);
10002   return CombineConsecutiveLoads(N, VT);
10003 }
10004
10005 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
10006 /// operands. DstEltVT indicates the destination element value type.
10007 SDValue DAGCombiner::
10008 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
10009   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
10010
10011   // If this is already the right type, we're done.
10012   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
10013
10014   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
10015   unsigned DstBitSize = DstEltVT.getSizeInBits();
10016
10017   // If this is a conversion of N elements of one type to N elements of another
10018   // type, convert each element.  This handles FP<->INT cases.
10019   if (SrcBitSize == DstBitSize) {
10020     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10021                               BV->getValueType(0).getVectorNumElements());
10022
10023     // Due to the FP element handling below calling this routine recursively,
10024     // we can end up with a scalar-to-vector node here.
10025     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
10026       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
10027                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
10028
10029     SmallVector<SDValue, 8> Ops;
10030     for (SDValue Op : BV->op_values()) {
10031       // If the vector element type is not legal, the BUILD_VECTOR operands
10032       // are promoted and implicitly truncated.  Make that explicit here.
10033       if (Op.getValueType() != SrcEltVT)
10034         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
10035       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
10036       AddToWorklist(Ops.back().getNode());
10037     }
10038     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
10039   }
10040
10041   // Otherwise, we're growing or shrinking the elements.  To avoid having to
10042   // handle annoying details of growing/shrinking FP values, we convert them to
10043   // int first.
10044   if (SrcEltVT.isFloatingPoint()) {
10045     // Convert the input float vector to a int vector where the elements are the
10046     // same sizes.
10047     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
10048     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
10049     SrcEltVT = IntVT;
10050   }
10051
10052   // Now we know the input is an integer vector.  If the output is a FP type,
10053   // convert to integer first, then to FP of the right size.
10054   if (DstEltVT.isFloatingPoint()) {
10055     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
10056     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
10057
10058     // Next, convert to FP elements of the same size.
10059     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
10060   }
10061
10062   SDLoc DL(BV);
10063
10064   // Okay, we know the src/dst types are both integers of differing types.
10065   // Handling growing first.
10066   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
10067   if (SrcBitSize < DstBitSize) {
10068     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
10069
10070     SmallVector<SDValue, 8> Ops;
10071     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
10072          i += NumInputsPerOutput) {
10073       bool isLE = DAG.getDataLayout().isLittleEndian();
10074       APInt NewBits = APInt(DstBitSize, 0);
10075       bool EltIsUndef = true;
10076       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
10077         // Shift the previously computed bits over.
10078         NewBits <<= SrcBitSize;
10079         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
10080         if (Op.isUndef()) continue;
10081         EltIsUndef = false;
10082
10083         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
10084                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
10085       }
10086
10087       if (EltIsUndef)
10088         Ops.push_back(DAG.getUNDEF(DstEltVT));
10089       else
10090         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
10091     }
10092
10093     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
10094     return DAG.getBuildVector(VT, DL, Ops);
10095   }
10096
10097   // Finally, this must be the case where we are shrinking elements: each input
10098   // turns into multiple outputs.
10099   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
10100   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10101                             NumOutputsPerInput*BV->getNumOperands());
10102   SmallVector<SDValue, 8> Ops;
10103
10104   for (const SDValue &Op : BV->op_values()) {
10105     if (Op.isUndef()) {
10106       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
10107       continue;
10108     }
10109
10110     APInt OpVal = cast<ConstantSDNode>(Op)->
10111                   getAPIntValue().zextOrTrunc(SrcBitSize);
10112
10113     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
10114       APInt ThisVal = OpVal.trunc(DstBitSize);
10115       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
10116       OpVal.lshrInPlace(DstBitSize);
10117     }
10118
10119     // For big endian targets, swap the order of the pieces of each element.
10120     if (DAG.getDataLayout().isBigEndian())
10121       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
10122   }
10123
10124   return DAG.getBuildVector(VT, DL, Ops);
10125 }
10126
10127 static bool isContractable(SDNode *N) {
10128   SDNodeFlags F = N->getFlags();
10129   return F.hasAllowContract() || F.hasAllowReassociation();
10130 }
10131
10132 /// Try to perform FMA combining on a given FADD node.
10133 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
10134   SDValue N0 = N->getOperand(0);
10135   SDValue N1 = N->getOperand(1);
10136   EVT VT = N->getValueType(0);
10137   SDLoc SL(N);
10138
10139   const TargetOptions &Options = DAG.getTarget().Options;
10140
10141   // Floating-point multiply-add with intermediate rounding.
10142   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10143
10144   // Floating-point multiply-add without intermediate rounding.
10145   bool HasFMA =
10146       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10147       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10148
10149   // No valid opcode, do not combine.
10150   if (!HasFMAD && !HasFMA)
10151     return SDValue();
10152
10153   SDNodeFlags Flags = N->getFlags();
10154   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10155   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10156                               CanFuse || HasFMAD);
10157   // If the addition is not contractable, do not combine.
10158   if (!AllowFusionGlobally && !isContractable(N))
10159     return SDValue();
10160
10161   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10162   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10163     return SDValue();
10164
10165   // Always prefer FMAD to FMA for precision.
10166   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10167   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10168
10169   // Is the node an FMUL and contractable either due to global flags or
10170   // SDNodeFlags.
10171   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10172     if (N.getOpcode() != ISD::FMUL)
10173       return false;
10174     return AllowFusionGlobally || isContractable(N.getNode());
10175   };
10176   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10177   // prefer to fold the multiply with fewer uses.
10178   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
10179     if (N0.getNode()->use_size() > N1.getNode()->use_size())
10180       std::swap(N0, N1);
10181   }
10182
10183   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10184   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10185     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10186                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
10187   }
10188
10189   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10190   // Note: Commutes FADD operands.
10191   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10192     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10193                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
10194   }
10195
10196   // Look through FP_EXTEND nodes to do more combining.
10197
10198   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10199   if (N0.getOpcode() == ISD::FP_EXTEND) {
10200     SDValue N00 = N0.getOperand(0);
10201     if (isContractableFMUL(N00) &&
10202         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10203       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10204                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10205                                      N00.getOperand(0)),
10206                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10207                                      N00.getOperand(1)), N1, Flags);
10208     }
10209   }
10210
10211   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10212   // Note: Commutes FADD operands.
10213   if (N1.getOpcode() == ISD::FP_EXTEND) {
10214     SDValue N10 = N1.getOperand(0);
10215     if (isContractableFMUL(N10) &&
10216         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10217       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10218                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10219                                      N10.getOperand(0)),
10220                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10221                                      N10.getOperand(1)), N0, Flags);
10222     }
10223   }
10224
10225   // More folding opportunities when target permits.
10226   if (Aggressive) {
10227     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
10228     if (CanFuse &&
10229         N0.getOpcode() == PreferredFusedOpcode &&
10230         N0.getOperand(2).getOpcode() == ISD::FMUL &&
10231         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
10232       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10233                          N0.getOperand(0), N0.getOperand(1),
10234                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10235                                      N0.getOperand(2).getOperand(0),
10236                                      N0.getOperand(2).getOperand(1),
10237                                      N1, Flags), Flags);
10238     }
10239
10240     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
10241     if (CanFuse &&
10242         N1->getOpcode() == PreferredFusedOpcode &&
10243         N1.getOperand(2).getOpcode() == ISD::FMUL &&
10244         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
10245       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10246                          N1.getOperand(0), N1.getOperand(1),
10247                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10248                                      N1.getOperand(2).getOperand(0),
10249                                      N1.getOperand(2).getOperand(1),
10250                                      N0, Flags), Flags);
10251     }
10252
10253
10254     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
10255     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
10256     auto FoldFAddFMAFPExtFMul = [&] (
10257       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10258       SDNodeFlags Flags) {
10259       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
10260                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10261                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10262                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10263                                      Z, Flags), Flags);
10264     };
10265     if (N0.getOpcode() == PreferredFusedOpcode) {
10266       SDValue N02 = N0.getOperand(2);
10267       if (N02.getOpcode() == ISD::FP_EXTEND) {
10268         SDValue N020 = N02.getOperand(0);
10269         if (isContractableFMUL(N020) &&
10270             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10271           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
10272                                       N020.getOperand(0), N020.getOperand(1),
10273                                       N1, Flags);
10274         }
10275       }
10276     }
10277
10278     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
10279     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
10280     // FIXME: This turns two single-precision and one double-precision
10281     // operation into two double-precision operations, which might not be
10282     // interesting for all targets, especially GPUs.
10283     auto FoldFAddFPExtFMAFMul = [&] (
10284       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10285       SDNodeFlags Flags) {
10286       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10287                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
10288                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
10289                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10290                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10291                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10292                                      Z, Flags), Flags);
10293     };
10294     if (N0.getOpcode() == ISD::FP_EXTEND) {
10295       SDValue N00 = N0.getOperand(0);
10296       if (N00.getOpcode() == PreferredFusedOpcode) {
10297         SDValue N002 = N00.getOperand(2);
10298         if (isContractableFMUL(N002) &&
10299             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10300           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
10301                                       N002.getOperand(0), N002.getOperand(1),
10302                                       N1, Flags);
10303         }
10304       }
10305     }
10306
10307     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
10308     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
10309     if (N1.getOpcode() == PreferredFusedOpcode) {
10310       SDValue N12 = N1.getOperand(2);
10311       if (N12.getOpcode() == ISD::FP_EXTEND) {
10312         SDValue N120 = N12.getOperand(0);
10313         if (isContractableFMUL(N120) &&
10314             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10315           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
10316                                       N120.getOperand(0), N120.getOperand(1),
10317                                       N0, Flags);
10318         }
10319       }
10320     }
10321
10322     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
10323     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
10324     // FIXME: This turns two single-precision and one double-precision
10325     // operation into two double-precision operations, which might not be
10326     // interesting for all targets, especially GPUs.
10327     if (N1.getOpcode() == ISD::FP_EXTEND) {
10328       SDValue N10 = N1.getOperand(0);
10329       if (N10.getOpcode() == PreferredFusedOpcode) {
10330         SDValue N102 = N10.getOperand(2);
10331         if (isContractableFMUL(N102) &&
10332             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10333           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
10334                                       N102.getOperand(0), N102.getOperand(1),
10335                                       N0, Flags);
10336         }
10337       }
10338     }
10339   }
10340
10341   return SDValue();
10342 }
10343
10344 /// Try to perform FMA combining on a given FSUB node.
10345 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
10346   SDValue N0 = N->getOperand(0);
10347   SDValue N1 = N->getOperand(1);
10348   EVT VT = N->getValueType(0);
10349   SDLoc SL(N);
10350
10351   const TargetOptions &Options = DAG.getTarget().Options;
10352   // Floating-point multiply-add with intermediate rounding.
10353   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10354
10355   // Floating-point multiply-add without intermediate rounding.
10356   bool HasFMA =
10357       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10358       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10359
10360   // No valid opcode, do not combine.
10361   if (!HasFMAD && !HasFMA)
10362     return SDValue();
10363
10364   const SDNodeFlags Flags = N->getFlags();
10365   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10366   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10367                               CanFuse || HasFMAD);
10368
10369   // If the subtraction is not contractable, do not combine.
10370   if (!AllowFusionGlobally && !isContractable(N))
10371     return SDValue();
10372
10373   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10374   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10375     return SDValue();
10376
10377   // Always prefer FMAD to FMA for precision.
10378   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10379   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10380
10381   // Is the node an FMUL and contractable either due to global flags or
10382   // SDNodeFlags.
10383   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10384     if (N.getOpcode() != ISD::FMUL)
10385       return false;
10386     return AllowFusionGlobally || isContractable(N.getNode());
10387   };
10388
10389   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
10390   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10391     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10392                        N0.getOperand(0), N0.getOperand(1),
10393                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10394   }
10395
10396   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
10397   // Note: Commutes FSUB operands.
10398   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10399     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10400                        DAG.getNode(ISD::FNEG, SL, VT,
10401                                    N1.getOperand(0)),
10402                        N1.getOperand(1), N0, Flags);
10403   }
10404
10405   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
10406   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
10407       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
10408     SDValue N00 = N0.getOperand(0).getOperand(0);
10409     SDValue N01 = N0.getOperand(0).getOperand(1);
10410     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10411                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
10412                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10413   }
10414
10415   // Look through FP_EXTEND nodes to do more combining.
10416
10417   // fold (fsub (fpext (fmul x, y)), z)
10418   //   -> (fma (fpext x), (fpext y), (fneg z))
10419   if (N0.getOpcode() == ISD::FP_EXTEND) {
10420     SDValue N00 = N0.getOperand(0);
10421     if (isContractableFMUL(N00) &&
10422         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10423       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10424                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10425                                      N00.getOperand(0)),
10426                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10427                                      N00.getOperand(1)),
10428                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10429     }
10430   }
10431
10432   // fold (fsub x, (fpext (fmul y, z)))
10433   //   -> (fma (fneg (fpext y)), (fpext z), x)
10434   // Note: Commutes FSUB operands.
10435   if (N1.getOpcode() == ISD::FP_EXTEND) {
10436     SDValue N10 = N1.getOperand(0);
10437     if (isContractableFMUL(N10) &&
10438         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10439       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10440                          DAG.getNode(ISD::FNEG, SL, VT,
10441                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
10442                                                  N10.getOperand(0))),
10443                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10444                                      N10.getOperand(1)),
10445                          N0, Flags);
10446     }
10447   }
10448
10449   // fold (fsub (fpext (fneg (fmul, x, y))), z)
10450   //   -> (fneg (fma (fpext x), (fpext y), z))
10451   // Note: This could be removed with appropriate canonicalization of the
10452   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10453   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10454   // from implementing the canonicalization in visitFSUB.
10455   if (N0.getOpcode() == ISD::FP_EXTEND) {
10456     SDValue N00 = N0.getOperand(0);
10457     if (N00.getOpcode() == ISD::FNEG) {
10458       SDValue N000 = N00.getOperand(0);
10459       if (isContractableFMUL(N000) &&
10460           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10461         return DAG.getNode(ISD::FNEG, SL, VT,
10462                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10463                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10464                                                    N000.getOperand(0)),
10465                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10466                                                    N000.getOperand(1)),
10467                                        N1, Flags));
10468       }
10469     }
10470   }
10471
10472   // fold (fsub (fneg (fpext (fmul, x, y))), z)
10473   //   -> (fneg (fma (fpext x)), (fpext y), z)
10474   // Note: This could be removed with appropriate canonicalization of the
10475   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10476   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10477   // from implementing the canonicalization in visitFSUB.
10478   if (N0.getOpcode() == ISD::FNEG) {
10479     SDValue N00 = N0.getOperand(0);
10480     if (N00.getOpcode() == ISD::FP_EXTEND) {
10481       SDValue N000 = N00.getOperand(0);
10482       if (isContractableFMUL(N000) &&
10483           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
10484         return DAG.getNode(ISD::FNEG, SL, VT,
10485                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10486                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10487                                                    N000.getOperand(0)),
10488                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10489                                                    N000.getOperand(1)),
10490                                        N1, Flags));
10491       }
10492     }
10493   }
10494
10495   // More folding opportunities when target permits.
10496   if (Aggressive) {
10497     // fold (fsub (fma x, y, (fmul u, v)), z)
10498     //   -> (fma x, y (fma u, v, (fneg z)))
10499     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
10500         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
10501         N0.getOperand(2)->hasOneUse()) {
10502       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10503                          N0.getOperand(0), N0.getOperand(1),
10504                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10505                                      N0.getOperand(2).getOperand(0),
10506                                      N0.getOperand(2).getOperand(1),
10507                                      DAG.getNode(ISD::FNEG, SL, VT,
10508                                                  N1), Flags), Flags);
10509     }
10510
10511     // fold (fsub x, (fma y, z, (fmul u, v)))
10512     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
10513     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
10514         isContractableFMUL(N1.getOperand(2))) {
10515       SDValue N20 = N1.getOperand(2).getOperand(0);
10516       SDValue N21 = N1.getOperand(2).getOperand(1);
10517       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10518                          DAG.getNode(ISD::FNEG, SL, VT,
10519                                      N1.getOperand(0)),
10520                          N1.getOperand(1),
10521                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10522                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
10523                                      N21, N0, Flags), Flags);
10524     }
10525
10526
10527     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
10528     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
10529     if (N0.getOpcode() == PreferredFusedOpcode) {
10530       SDValue N02 = N0.getOperand(2);
10531       if (N02.getOpcode() == ISD::FP_EXTEND) {
10532         SDValue N020 = N02.getOperand(0);
10533         if (isContractableFMUL(N020) &&
10534             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10535           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10536                              N0.getOperand(0), N0.getOperand(1),
10537                              DAG.getNode(PreferredFusedOpcode, SL, VT,
10538                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10539                                                      N020.getOperand(0)),
10540                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10541                                                      N020.getOperand(1)),
10542                                          DAG.getNode(ISD::FNEG, SL, VT,
10543                                                      N1), Flags), Flags);
10544         }
10545       }
10546     }
10547
10548     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
10549     //   -> (fma (fpext x), (fpext y),
10550     //           (fma (fpext u), (fpext v), (fneg z)))
10551     // FIXME: This turns two single-precision and one double-precision
10552     // operation into two double-precision operations, which might not be
10553     // interesting for all targets, especially GPUs.
10554     if (N0.getOpcode() == ISD::FP_EXTEND) {
10555       SDValue N00 = N0.getOperand(0);
10556       if (N00.getOpcode() == PreferredFusedOpcode) {
10557         SDValue N002 = N00.getOperand(2);
10558         if (isContractableFMUL(N002) &&
10559             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10560           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10561                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
10562                                          N00.getOperand(0)),
10563                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
10564                                          N00.getOperand(1)),
10565                              DAG.getNode(PreferredFusedOpcode, SL, VT,
10566                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10567                                                      N002.getOperand(0)),
10568                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10569                                                      N002.getOperand(1)),
10570                                          DAG.getNode(ISD::FNEG, SL, VT,
10571                                                      N1), Flags), Flags);
10572         }
10573       }
10574     }
10575
10576     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
10577     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
10578     if (N1.getOpcode() == PreferredFusedOpcode &&
10579         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
10580       SDValue N120 = N1.getOperand(2).getOperand(0);
10581       if (isContractableFMUL(N120) &&
10582           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10583         SDValue N1200 = N120.getOperand(0);
10584         SDValue N1201 = N120.getOperand(1);
10585         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10586                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
10587                            N1.getOperand(1),
10588                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10589                                        DAG.getNode(ISD::FNEG, SL, VT,
10590                                                    DAG.getNode(ISD::FP_EXTEND, SL,
10591                                                                VT, N1200)),
10592                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10593                                                    N1201),
10594                                        N0, Flags), Flags);
10595       }
10596     }
10597
10598     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
10599     //   -> (fma (fneg (fpext y)), (fpext z),
10600     //           (fma (fneg (fpext u)), (fpext v), x))
10601     // FIXME: This turns two single-precision and one double-precision
10602     // operation into two double-precision operations, which might not be
10603     // interesting for all targets, especially GPUs.
10604     if (N1.getOpcode() == ISD::FP_EXTEND &&
10605         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
10606       SDValue CvtSrc = N1.getOperand(0);
10607       SDValue N100 = CvtSrc.getOperand(0);
10608       SDValue N101 = CvtSrc.getOperand(1);
10609       SDValue N102 = CvtSrc.getOperand(2);
10610       if (isContractableFMUL(N102) &&
10611           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
10612         SDValue N1020 = N102.getOperand(0);
10613         SDValue N1021 = N102.getOperand(1);
10614         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10615                            DAG.getNode(ISD::FNEG, SL, VT,
10616                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10617                                                    N100)),
10618                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
10619                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10620                                        DAG.getNode(ISD::FNEG, SL, VT,
10621                                                    DAG.getNode(ISD::FP_EXTEND, SL,
10622                                                                VT, N1020)),
10623                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10624                                                    N1021),
10625                                        N0, Flags), Flags);
10626       }
10627     }
10628   }
10629
10630   return SDValue();
10631 }
10632
10633 /// Try to perform FMA combining on a given FMUL node based on the distributive
10634 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
10635 /// subtraction instead of addition).
10636 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
10637   SDValue N0 = N->getOperand(0);
10638   SDValue N1 = N->getOperand(1);
10639   EVT VT = N->getValueType(0);
10640   SDLoc SL(N);
10641   const SDNodeFlags Flags = N->getFlags();
10642
10643   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
10644
10645   const TargetOptions &Options = DAG.getTarget().Options;
10646
10647   // The transforms below are incorrect when x == 0 and y == inf, because the
10648   // intermediate multiplication produces a nan.
10649   if (!Options.NoInfsFPMath)
10650     return SDValue();
10651
10652   // Floating-point multiply-add without intermediate rounding.
10653   bool HasFMA =
10654       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
10655       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10656       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10657
10658   // Floating-point multiply-add with intermediate rounding. This can result
10659   // in a less precise result due to the changed rounding order.
10660   bool HasFMAD = Options.UnsafeFPMath &&
10661                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10662
10663   // No valid opcode, do not combine.
10664   if (!HasFMAD && !HasFMA)
10665     return SDValue();
10666
10667   // Always prefer FMAD to FMA for precision.
10668   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10669   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10670
10671   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
10672   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
10673   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10674     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
10675       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
10676       if (XC1 && XC1->isExactlyValue(+1.0))
10677         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10678                            Y, Flags);
10679       if (XC1 && XC1->isExactlyValue(-1.0))
10680         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10681                            DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10682     }
10683     return SDValue();
10684   };
10685
10686   if (SDValue FMA = FuseFADD(N0, N1, Flags))
10687     return FMA;
10688   if (SDValue FMA = FuseFADD(N1, N0, Flags))
10689     return FMA;
10690
10691   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
10692   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
10693   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
10694   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
10695   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10696     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
10697       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
10698       if (XC0 && XC0->isExactlyValue(+1.0))
10699         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10700                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
10701                            Y, Flags);
10702       if (XC0 && XC0->isExactlyValue(-1.0))
10703         return DAG.getNode(PreferredFusedOpcode, SL, VT,
10704                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
10705                            DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10706
10707       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
10708       if (XC1 && XC1->isExactlyValue(+1.0))
10709         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10710                            DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10711       if (XC1 && XC1->isExactlyValue(-1.0))
10712         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10713                            Y, Flags);
10714     }
10715     return SDValue();
10716   };
10717
10718   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
10719     return FMA;
10720   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
10721     return FMA;
10722
10723   return SDValue();
10724 }
10725
10726 static bool isFMulNegTwo(SDValue &N) {
10727   if (N.getOpcode() != ISD::FMUL)
10728     return false;
10729   if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
10730     return CFP->isExactlyValue(-2.0);
10731   return false;
10732 }
10733
10734 SDValue DAGCombiner::visitFADD(SDNode *N) {
10735   SDValue N0 = N->getOperand(0);
10736   SDValue N1 = N->getOperand(1);
10737   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
10738   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
10739   EVT VT = N->getValueType(0);
10740   SDLoc DL(N);
10741   const TargetOptions &Options = DAG.getTarget().Options;
10742   const SDNodeFlags Flags = N->getFlags();
10743
10744   // fold vector ops
10745   if (VT.isVector())
10746     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10747       return FoldedVOp;
10748
10749   // fold (fadd c1, c2) -> c1 + c2
10750   if (N0CFP && N1CFP)
10751     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
10752
10753   // canonicalize constant to RHS
10754   if (N0CFP && !N1CFP)
10755     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
10756
10757   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
10758   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
10759   if (N1C && N1C->isZero())
10760     if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
10761       return N0;
10762
10763   if (SDValue NewSel = foldBinOpIntoSelect(N))
10764     return NewSel;
10765
10766   // fold (fadd A, (fneg B)) -> (fsub A, B)
10767   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
10768       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
10769     return DAG.getNode(ISD::FSUB, DL, VT, N0,
10770                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10771
10772   // fold (fadd (fneg A), B) -> (fsub B, A)
10773   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
10774       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
10775     return DAG.getNode(ISD::FSUB, DL, VT, N1,
10776                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
10777
10778   // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
10779   // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
10780   if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
10781       (isFMulNegTwo(N1) && N1.hasOneUse())) {
10782     bool N1IsFMul = isFMulNegTwo(N1);
10783     SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
10784     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
10785     return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
10786   }
10787
10788   // No FP constant should be created after legalization as Instruction
10789   // Selection pass has a hard time dealing with FP constants.
10790   bool AllowNewConst = (Level < AfterLegalizeDAG);
10791
10792   // If 'unsafe math' or nnan is enabled, fold lots of things.
10793   if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
10794     // If allowed, fold (fadd (fneg x), x) -> 0.0
10795     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
10796       return DAG.getConstantFP(0.0, DL, VT);
10797
10798     // If allowed, fold (fadd x, (fneg x)) -> 0.0
10799     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
10800       return DAG.getConstantFP(0.0, DL, VT);
10801   }
10802
10803   // If 'unsafe math' or reassoc and nsz, fold lots of things.
10804   // TODO: break out portions of the transformations below for which Unsafe is
10805   //       considered and which do not require both nsz and reassoc
10806   if ((Options.UnsafeFPMath ||
10807        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
10808       AllowNewConst) {
10809     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
10810     if (N1CFP && N0.getOpcode() == ISD::FADD &&
10811         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
10812       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
10813       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
10814     }
10815
10816     // We can fold chains of FADD's of the same value into multiplications.
10817     // This transform is not safe in general because we are reducing the number
10818     // of rounding steps.
10819     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
10820       if (N0.getOpcode() == ISD::FMUL) {
10821         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10822         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
10823
10824         // (fadd (fmul x, c), x) -> (fmul x, c+1)
10825         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
10826           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10827                                        DAG.getConstantFP(1.0, DL, VT), Flags);
10828           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
10829         }
10830
10831         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
10832         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
10833             N1.getOperand(0) == N1.getOperand(1) &&
10834             N0.getOperand(0) == N1.getOperand(0)) {
10835           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10836                                        DAG.getConstantFP(2.0, DL, VT), Flags);
10837           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
10838         }
10839       }
10840
10841       if (N1.getOpcode() == ISD::FMUL) {
10842         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10843         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
10844
10845         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
10846         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
10847           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10848                                        DAG.getConstantFP(1.0, DL, VT), Flags);
10849           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
10850         }
10851
10852         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
10853         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
10854             N0.getOperand(0) == N0.getOperand(1) &&
10855             N1.getOperand(0) == N0.getOperand(0)) {
10856           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10857                                        DAG.getConstantFP(2.0, DL, VT), Flags);
10858           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
10859         }
10860       }
10861
10862       if (N0.getOpcode() == ISD::FADD) {
10863         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10864         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
10865         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
10866             (N0.getOperand(0) == N1)) {
10867           return DAG.getNode(ISD::FMUL, DL, VT,
10868                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
10869         }
10870       }
10871
10872       if (N1.getOpcode() == ISD::FADD) {
10873         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10874         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
10875         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
10876             N1.getOperand(0) == N0) {
10877           return DAG.getNode(ISD::FMUL, DL, VT,
10878                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
10879         }
10880       }
10881
10882       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
10883       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
10884           N0.getOperand(0) == N0.getOperand(1) &&
10885           N1.getOperand(0) == N1.getOperand(1) &&
10886           N0.getOperand(0) == N1.getOperand(0)) {
10887         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
10888                            DAG.getConstantFP(4.0, DL, VT), Flags);
10889       }
10890     }
10891   } // enable-unsafe-fp-math
10892
10893   // FADD -> FMA combines:
10894   if (SDValue Fused = visitFADDForFMACombine(N)) {
10895     AddToWorklist(Fused.getNode());
10896     return Fused;
10897   }
10898   return SDValue();
10899 }
10900
10901 SDValue DAGCombiner::visitFSUB(SDNode *N) {
10902   SDValue N0 = N->getOperand(0);
10903   SDValue N1 = N->getOperand(1);
10904   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10905   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10906   EVT VT = N->getValueType(0);
10907   SDLoc DL(N);
10908   const TargetOptions &Options = DAG.getTarget().Options;
10909   const SDNodeFlags Flags = N->getFlags();
10910
10911   // fold vector ops
10912   if (VT.isVector())
10913     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10914       return FoldedVOp;
10915
10916   // fold (fsub c1, c2) -> c1-c2
10917   if (N0CFP && N1CFP)
10918     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
10919
10920   if (SDValue NewSel = foldBinOpIntoSelect(N))
10921     return NewSel;
10922
10923   // (fsub A, 0) -> A
10924   if (N1CFP && N1CFP->isZero()) {
10925     if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
10926         Flags.hasNoSignedZeros()) {
10927       return N0;
10928     }
10929   }
10930
10931   if (N0 == N1) {
10932     // (fsub x, x) -> 0.0
10933     if (Options.UnsafeFPMath || Flags.hasNoNaNs())
10934       return DAG.getConstantFP(0.0f, DL, VT);
10935   }
10936
10937   // (fsub 0, B) -> -B
10938   if (N0CFP && N0CFP->isZero()) {
10939     if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
10940       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10941         return GetNegatedExpression(N1, DAG, LegalOperations);
10942       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10943         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
10944     }
10945   }
10946
10947   if ((Options.UnsafeFPMath ||
10948       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
10949       && N1.getOpcode() == ISD::FADD) {
10950     // X - (X + Y) -> -Y
10951     if (N0 == N1->getOperand(0))
10952       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
10953     // X - (Y + X) -> -Y
10954     if (N0 == N1->getOperand(1))
10955       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
10956   }
10957
10958   // fold (fsub A, (fneg B)) -> (fadd A, B)
10959   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10960     return DAG.getNode(ISD::FADD, DL, VT, N0,
10961                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10962
10963   // FSUB -> FMA combines:
10964   if (SDValue Fused = visitFSUBForFMACombine(N)) {
10965     AddToWorklist(Fused.getNode());
10966     return Fused;
10967   }
10968
10969   return SDValue();
10970 }
10971
10972 SDValue DAGCombiner::visitFMUL(SDNode *N) {
10973   SDValue N0 = N->getOperand(0);
10974   SDValue N1 = N->getOperand(1);
10975   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10976   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10977   EVT VT = N->getValueType(0);
10978   SDLoc DL(N);
10979   const TargetOptions &Options = DAG.getTarget().Options;
10980   const SDNodeFlags Flags = N->getFlags();
10981
10982   // fold vector ops
10983   if (VT.isVector()) {
10984     // This just handles C1 * C2 for vectors. Other vector folds are below.
10985     if (SDValue FoldedVOp = SimplifyVBinOp(N))
10986       return FoldedVOp;
10987   }
10988
10989   // fold (fmul c1, c2) -> c1*c2
10990   if (N0CFP && N1CFP)
10991     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
10992
10993   // canonicalize constant to RHS
10994   if (isConstantFPBuildVectorOrConstantFP(N0) &&
10995      !isConstantFPBuildVectorOrConstantFP(N1))
10996     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
10997
10998   // fold (fmul A, 1.0) -> A
10999   if (N1CFP && N1CFP->isExactlyValue(1.0))
11000     return N0;
11001
11002   if (SDValue NewSel = foldBinOpIntoSelect(N))
11003     return NewSel;
11004
11005   if (Options.UnsafeFPMath ||
11006       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
11007     // fold (fmul A, 0) -> 0
11008     if (N1CFP && N1CFP->isZero())
11009       return N1;
11010   }
11011
11012   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
11013     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11014     if (N0.getOpcode() == ISD::FMUL) {
11015       // Fold scalars or any vector constants (not just splats).
11016       // This fold is done in general by InstCombine, but extra fmul insts
11017       // may have been generated during lowering.
11018       SDValue N00 = N0.getOperand(0);
11019       SDValue N01 = N0.getOperand(1);
11020       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
11021       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
11022       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
11023
11024       // Check 1: Make sure that the first operand of the inner multiply is NOT
11025       // a constant. Otherwise, we may induce infinite looping.
11026       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
11027         // Check 2: Make sure that the second operand of the inner multiply and
11028         // the second operand of the outer multiply are constants.
11029         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
11030             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
11031           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
11032           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
11033         }
11034       }
11035     }
11036
11037     // Match a special-case: we convert X * 2.0 into fadd.
11038     // fmul (fadd X, X), C -> fmul X, 2.0 * C
11039     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
11040         N0.getOperand(0) == N0.getOperand(1)) {
11041       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
11042       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
11043       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
11044     }
11045   }
11046
11047   // fold (fmul X, 2.0) -> (fadd X, X)
11048   if (N1CFP && N1CFP->isExactlyValue(+2.0))
11049     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
11050
11051   // fold (fmul X, -1.0) -> (fneg X)
11052   if (N1CFP && N1CFP->isExactlyValue(-1.0))
11053     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11054       return DAG.getNode(ISD::FNEG, DL, VT, N0);
11055
11056   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11057   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11058     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11059       // Both can be negated for free, check to see if at least one is cheaper
11060       // negated.
11061       if (LHSNeg == 2 || RHSNeg == 2)
11062         return DAG.getNode(ISD::FMUL, DL, VT,
11063                            GetNegatedExpression(N0, DAG, LegalOperations),
11064                            GetNegatedExpression(N1, DAG, LegalOperations),
11065                            Flags);
11066     }
11067   }
11068
11069   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11070   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11071   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
11072       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
11073       TLI.isOperationLegal(ISD::FABS, VT)) {
11074     SDValue Select = N0, X = N1;
11075     if (Select.getOpcode() != ISD::SELECT)
11076       std::swap(Select, X);
11077
11078     SDValue Cond = Select.getOperand(0);
11079     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
11080     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
11081
11082     if (TrueOpnd && FalseOpnd &&
11083         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
11084         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
11085         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
11086       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11087       switch (CC) {
11088       default: break;
11089       case ISD::SETOLT:
11090       case ISD::SETULT:
11091       case ISD::SETOLE:
11092       case ISD::SETULE:
11093       case ISD::SETLT:
11094       case ISD::SETLE:
11095         std::swap(TrueOpnd, FalseOpnd);
11096         LLVM_FALLTHROUGH;
11097       case ISD::SETOGT:
11098       case ISD::SETUGT:
11099       case ISD::SETOGE:
11100       case ISD::SETUGE:
11101       case ISD::SETGT:
11102       case ISD::SETGE:
11103         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
11104             TLI.isOperationLegal(ISD::FNEG, VT))
11105           return DAG.getNode(ISD::FNEG, DL, VT,
11106                    DAG.getNode(ISD::FABS, DL, VT, X));
11107         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
11108           return DAG.getNode(ISD::FABS, DL, VT, X);
11109
11110         break;
11111       }
11112     }
11113   }
11114
11115   // FMUL -> FMA combines:
11116   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
11117     AddToWorklist(Fused.getNode());
11118     return Fused;
11119   }
11120
11121   return SDValue();
11122 }
11123
11124 SDValue DAGCombiner::visitFMA(SDNode *N) {
11125   SDValue N0 = N->getOperand(0);
11126   SDValue N1 = N->getOperand(1);
11127   SDValue N2 = N->getOperand(2);
11128   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11129   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11130   EVT VT = N->getValueType(0);
11131   SDLoc DL(N);
11132   const TargetOptions &Options = DAG.getTarget().Options;
11133
11134   // FMA nodes have flags that propagate to the created nodes.
11135   const SDNodeFlags Flags = N->getFlags();
11136   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
11137
11138   // Constant fold FMA.
11139   if (isa<ConstantFPSDNode>(N0) &&
11140       isa<ConstantFPSDNode>(N1) &&
11141       isa<ConstantFPSDNode>(N2)) {
11142     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
11143   }
11144
11145   if (UnsafeFPMath) {
11146     if (N0CFP && N0CFP->isZero())
11147       return N2;
11148     if (N1CFP && N1CFP->isZero())
11149       return N2;
11150   }
11151   // TODO: The FMA node should have flags that propagate to these nodes.
11152   if (N0CFP && N0CFP->isExactlyValue(1.0))
11153     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
11154   if (N1CFP && N1CFP->isExactlyValue(1.0))
11155     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
11156
11157   // Canonicalize (fma c, x, y) -> (fma x, c, y)
11158   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11159      !isConstantFPBuildVectorOrConstantFP(N1))
11160     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
11161
11162   if (UnsafeFPMath) {
11163     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11164     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
11165         isConstantFPBuildVectorOrConstantFP(N1) &&
11166         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
11167       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11168                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
11169                                      Flags), Flags);
11170     }
11171
11172     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11173     if (N0.getOpcode() == ISD::FMUL &&
11174         isConstantFPBuildVectorOrConstantFP(N1) &&
11175         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11176       return DAG.getNode(ISD::FMA, DL, VT,
11177                          N0.getOperand(0),
11178                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
11179                                      Flags),
11180                          N2);
11181     }
11182   }
11183
11184   // (fma x, 1, y) -> (fadd x, y)
11185   // (fma x, -1, y) -> (fadd (fneg x), y)
11186   if (N1CFP) {
11187     if (N1CFP->isExactlyValue(1.0))
11188       // TODO: The FMA node should have flags that propagate to this node.
11189       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
11190
11191     if (N1CFP->isExactlyValue(-1.0) &&
11192         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
11193       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
11194       AddToWorklist(RHSNeg.getNode());
11195       // TODO: The FMA node should have flags that propagate to this node.
11196       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
11197     }
11198
11199     // fma (fneg x), K, y -> fma x -K, y
11200     if (N0.getOpcode() == ISD::FNEG &&
11201         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11202          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
11203       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
11204                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
11205     }
11206   }
11207
11208   if (UnsafeFPMath) {
11209     // (fma x, c, x) -> (fmul x, (c+1))
11210     if (N1CFP && N0 == N2) {
11211       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11212                          DAG.getNode(ISD::FADD, DL, VT, N1,
11213                                      DAG.getConstantFP(1.0, DL, VT), Flags),
11214                          Flags);
11215     }
11216
11217     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
11218     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
11219       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11220                          DAG.getNode(ISD::FADD, DL, VT, N1,
11221                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
11222                          Flags);
11223     }
11224   }
11225
11226   return SDValue();
11227 }
11228
11229 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11230 // reciprocal.
11231 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
11232 // Notice that this is not always beneficial. One reason is different targets
11233 // may have different costs for FDIV and FMUL, so sometimes the cost of two
11234 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
11235 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
11236 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
11237   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
11238   const SDNodeFlags Flags = N->getFlags();
11239   if (!UnsafeMath && !Flags.hasAllowReciprocal())
11240     return SDValue();
11241
11242   // Skip if current node is a reciprocal.
11243   SDValue N0 = N->getOperand(0);
11244   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11245   if (N0CFP && N0CFP->isExactlyValue(1.0))
11246     return SDValue();
11247
11248   // Exit early if the target does not want this transform or if there can't
11249   // possibly be enough uses of the divisor to make the transform worthwhile.
11250   SDValue N1 = N->getOperand(1);
11251   unsigned MinUses = TLI.combineRepeatedFPDivisors();
11252   if (!MinUses || N1->use_size() < MinUses)
11253     return SDValue();
11254
11255   // Find all FDIV users of the same divisor.
11256   // Use a set because duplicates may be present in the user list.
11257   SetVector<SDNode *> Users;
11258   for (auto *U : N1->uses()) {
11259     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
11260       // This division is eligible for optimization only if global unsafe math
11261       // is enabled or if this division allows reciprocal formation.
11262       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
11263         Users.insert(U);
11264     }
11265   }
11266
11267   // Now that we have the actual number of divisor uses, make sure it meets
11268   // the minimum threshold specified by the target.
11269   if (Users.size() < MinUses)
11270     return SDValue();
11271
11272   EVT VT = N->getValueType(0);
11273   SDLoc DL(N);
11274   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
11275   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
11276
11277   // Dividend / Divisor -> Dividend * Reciprocal
11278   for (auto *U : Users) {
11279     SDValue Dividend = U->getOperand(0);
11280     if (Dividend != FPOne) {
11281       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
11282                                     Reciprocal, Flags);
11283       CombineTo(U, NewNode);
11284     } else if (U != Reciprocal.getNode()) {
11285       // In the absence of fast-math-flags, this user node is always the
11286       // same node as Reciprocal, but with FMF they may be different nodes.
11287       CombineTo(U, Reciprocal);
11288     }
11289   }
11290   return SDValue(N, 0);  // N was replaced.
11291 }
11292
11293 SDValue DAGCombiner::visitFDIV(SDNode *N) {
11294   SDValue N0 = N->getOperand(0);
11295   SDValue N1 = N->getOperand(1);
11296   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11297   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11298   EVT VT = N->getValueType(0);
11299   SDLoc DL(N);
11300   const TargetOptions &Options = DAG.getTarget().Options;
11301   SDNodeFlags Flags = N->getFlags();
11302
11303   // fold vector ops
11304   if (VT.isVector())
11305     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11306       return FoldedVOp;
11307
11308   // fold (fdiv c1, c2) -> c1/c2
11309   if (N0CFP && N1CFP)
11310     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
11311
11312   if (SDValue NewSel = foldBinOpIntoSelect(N))
11313     return NewSel;
11314
11315   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
11316     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
11317     if (N1CFP) {
11318       // Compute the reciprocal 1.0 / c2.
11319       const APFloat &N1APF = N1CFP->getValueAPF();
11320       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
11321       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
11322       // Only do the transform if the reciprocal is a legal fp immediate that
11323       // isn't too nasty (eg NaN, denormal, ...).
11324       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
11325           (!LegalOperations ||
11326            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
11327            // backend)... we should handle this gracefully after Legalize.
11328            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
11329            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11330            TLI.isFPImmLegal(Recip, VT)))
11331         return DAG.getNode(ISD::FMUL, DL, VT, N0,
11332                            DAG.getConstantFP(Recip, DL, VT), Flags);
11333     }
11334
11335     // If this FDIV is part of a reciprocal square root, it may be folded
11336     // into a target-specific square root estimate instruction.
11337     if (N1.getOpcode() == ISD::FSQRT) {
11338       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
11339         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11340       }
11341     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
11342                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11343       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11344                                           Flags)) {
11345         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
11346         AddToWorklist(RV.getNode());
11347         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11348       }
11349     } else if (N1.getOpcode() == ISD::FP_ROUND &&
11350                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11351       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11352                                           Flags)) {
11353         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
11354         AddToWorklist(RV.getNode());
11355         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11356       }
11357     } else if (N1.getOpcode() == ISD::FMUL) {
11358       // Look through an FMUL. Even though this won't remove the FDIV directly,
11359       // it's still worthwhile to get rid of the FSQRT if possible.
11360       SDValue SqrtOp;
11361       SDValue OtherOp;
11362       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11363         SqrtOp = N1.getOperand(0);
11364         OtherOp = N1.getOperand(1);
11365       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
11366         SqrtOp = N1.getOperand(1);
11367         OtherOp = N1.getOperand(0);
11368       }
11369       if (SqrtOp.getNode()) {
11370         // We found a FSQRT, so try to make this fold:
11371         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
11372         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
11373           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
11374           AddToWorklist(RV.getNode());
11375           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11376         }
11377       }
11378     }
11379
11380     // Fold into a reciprocal estimate and multiply instead of a real divide.
11381     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
11382       AddToWorklist(RV.getNode());
11383       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11384     }
11385   }
11386
11387   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
11388   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11389     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11390       // Both can be negated for free, check to see if at least one is cheaper
11391       // negated.
11392       if (LHSNeg == 2 || RHSNeg == 2)
11393         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
11394                            GetNegatedExpression(N0, DAG, LegalOperations),
11395                            GetNegatedExpression(N1, DAG, LegalOperations),
11396                            Flags);
11397     }
11398   }
11399
11400   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
11401     return CombineRepeatedDivisors;
11402
11403   return SDValue();
11404 }
11405
11406 SDValue DAGCombiner::visitFREM(SDNode *N) {
11407   SDValue N0 = N->getOperand(0);
11408   SDValue N1 = N->getOperand(1);
11409   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11410   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11411   EVT VT = N->getValueType(0);
11412
11413   // fold (frem c1, c2) -> fmod(c1,c2)
11414   if (N0CFP && N1CFP)
11415     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
11416
11417   if (SDValue NewSel = foldBinOpIntoSelect(N))
11418     return NewSel;
11419
11420   return SDValue();
11421 }
11422
11423 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
11424   SDNodeFlags Flags = N->getFlags();
11425   if (!DAG.getTarget().Options.UnsafeFPMath &&
11426       !Flags.hasApproximateFuncs())
11427     return SDValue();
11428
11429   SDValue N0 = N->getOperand(0);
11430   if (TLI.isFsqrtCheap(N0, DAG))
11431     return SDValue();
11432
11433   // FSQRT nodes have flags that propagate to the created nodes.
11434   return buildSqrtEstimate(N0, Flags);
11435 }
11436
11437 /// copysign(x, fp_extend(y)) -> copysign(x, y)
11438 /// copysign(x, fp_round(y)) -> copysign(x, y)
11439 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
11440   SDValue N1 = N->getOperand(1);
11441   if ((N1.getOpcode() == ISD::FP_EXTEND ||
11442        N1.getOpcode() == ISD::FP_ROUND)) {
11443     // Do not optimize out type conversion of f128 type yet.
11444     // For some targets like x86_64, configuration is changed to keep one f128
11445     // value in one SSE register, but instruction selection cannot handle
11446     // FCOPYSIGN on SSE registers yet.
11447     EVT N1VT = N1->getValueType(0);
11448     EVT N1Op0VT = N1->getOperand(0).getValueType();
11449     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
11450   }
11451   return false;
11452 }
11453
11454 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
11455   SDValue N0 = N->getOperand(0);
11456   SDValue N1 = N->getOperand(1);
11457   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11458   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11459   EVT VT = N->getValueType(0);
11460
11461   if (N0CFP && N1CFP) // Constant fold
11462     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
11463
11464   if (N1CFP) {
11465     const APFloat &V = N1CFP->getValueAPF();
11466     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
11467     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
11468     if (!V.isNegative()) {
11469       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
11470         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11471     } else {
11472       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11473         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
11474                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
11475     }
11476   }
11477
11478   // copysign(fabs(x), y) -> copysign(x, y)
11479   // copysign(fneg(x), y) -> copysign(x, y)
11480   // copysign(copysign(x,z), y) -> copysign(x, y)
11481   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
11482       N0.getOpcode() == ISD::FCOPYSIGN)
11483     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
11484
11485   // copysign(x, abs(y)) -> abs(x)
11486   if (N1.getOpcode() == ISD::FABS)
11487     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11488
11489   // copysign(x, copysign(y,z)) -> copysign(x, z)
11490   if (N1.getOpcode() == ISD::FCOPYSIGN)
11491     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
11492
11493   // copysign(x, fp_extend(y)) -> copysign(x, y)
11494   // copysign(x, fp_round(y)) -> copysign(x, y)
11495   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
11496     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
11497
11498   return SDValue();
11499 }
11500
11501 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
11502                                const TargetLowering &TLI) {
11503   // This optimization is guarded by a function attribute because it may produce
11504   // unexpected results. Ie, programs may be relying on the platform-specific
11505   // undefined behavior when the float-to-int conversion overflows.
11506   const Function &F = DAG.getMachineFunction().getFunction();
11507   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
11508   if (StrictOverflow.getValueAsString().equals("false"))
11509     return SDValue();
11510
11511   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
11512   // replacing casts with a libcall. We also must be allowed to ignore -0.0
11513   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
11514   // conversions would return +0.0.
11515   // FIXME: We should be able to use node-level FMF here.
11516   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
11517   EVT VT = N->getValueType(0);
11518   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
11519       !DAG.getTarget().Options.NoSignedZerosFPMath)
11520     return SDValue();
11521
11522   // fptosi/fptoui round towards zero, so converting from FP to integer and
11523   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
11524   SDValue N0 = N->getOperand(0);
11525   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
11526       N0.getOperand(0).getValueType() == VT)
11527     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11528
11529   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
11530       N0.getOperand(0).getValueType() == VT)
11531     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11532
11533   return SDValue();
11534 }
11535
11536 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
11537   SDValue N0 = N->getOperand(0);
11538   EVT VT = N->getValueType(0);
11539   EVT OpVT = N0.getValueType();
11540
11541   // fold (sint_to_fp c1) -> c1fp
11542   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11543       // ...but only if the target supports immediate floating-point values
11544       (!LegalOperations ||
11545        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11546     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11547
11548   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
11549   // but UINT_TO_FP is legal on this target, try to convert.
11550   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
11551       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
11552     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
11553     if (DAG.SignBitIsZero(N0))
11554       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11555   }
11556
11557   // The next optimizations are desirable only if SELECT_CC can be lowered.
11558   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11559     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11560     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
11561         !VT.isVector() &&
11562         (!LegalOperations ||
11563          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11564       SDLoc DL(N);
11565       SDValue Ops[] =
11566         { N0.getOperand(0), N0.getOperand(1),
11567           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11568           N0.getOperand(2) };
11569       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11570     }
11571
11572     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
11573     //      (select_cc x, y, 1.0, 0.0,, cc)
11574     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
11575         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
11576         (!LegalOperations ||
11577          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11578       SDLoc DL(N);
11579       SDValue Ops[] =
11580         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
11581           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11582           N0.getOperand(0).getOperand(2) };
11583       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11584     }
11585   }
11586
11587   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11588     return FTrunc;
11589
11590   return SDValue();
11591 }
11592
11593 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
11594   SDValue N0 = N->getOperand(0);
11595   EVT VT = N->getValueType(0);
11596   EVT OpVT = N0.getValueType();
11597
11598   // fold (uint_to_fp c1) -> c1fp
11599   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11600       // ...but only if the target supports immediate floating-point values
11601       (!LegalOperations ||
11602        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11603     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11604
11605   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
11606   // but SINT_TO_FP is legal on this target, try to convert.
11607   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
11608       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
11609     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
11610     if (DAG.SignBitIsZero(N0))
11611       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11612   }
11613
11614   // The next optimizations are desirable only if SELECT_CC can be lowered.
11615   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11616     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11617     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
11618         (!LegalOperations ||
11619          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11620       SDLoc DL(N);
11621       SDValue Ops[] =
11622         { N0.getOperand(0), N0.getOperand(1),
11623           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11624           N0.getOperand(2) };
11625       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11626     }
11627   }
11628
11629   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11630     return FTrunc;
11631
11632   return SDValue();
11633 }
11634
11635 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
11636 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
11637   SDValue N0 = N->getOperand(0);
11638   EVT VT = N->getValueType(0);
11639
11640   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
11641     return SDValue();
11642
11643   SDValue Src = N0.getOperand(0);
11644   EVT SrcVT = Src.getValueType();
11645   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
11646   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
11647
11648   // We can safely assume the conversion won't overflow the output range,
11649   // because (for example) (uint8_t)18293.f is undefined behavior.
11650
11651   // Since we can assume the conversion won't overflow, our decision as to
11652   // whether the input will fit in the float should depend on the minimum
11653   // of the input range and output range.
11654
11655   // This means this is also safe for a signed input and unsigned output, since
11656   // a negative input would lead to undefined behavior.
11657   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
11658   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
11659   unsigned ActualSize = std::min(InputSize, OutputSize);
11660   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
11661
11662   // We can only fold away the float conversion if the input range can be
11663   // represented exactly in the float range.
11664   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
11665     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
11666       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
11667                                                        : ISD::ZERO_EXTEND;
11668       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
11669     }
11670     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
11671       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
11672     return DAG.getBitcast(VT, Src);
11673   }
11674   return SDValue();
11675 }
11676
11677 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
11678   SDValue N0 = N->getOperand(0);
11679   EVT VT = N->getValueType(0);
11680
11681   // fold (fp_to_sint c1fp) -> c1
11682   if (isConstantFPBuildVectorOrConstantFP(N0))
11683     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
11684
11685   return FoldIntToFPToInt(N, DAG);
11686 }
11687
11688 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
11689   SDValue N0 = N->getOperand(0);
11690   EVT VT = N->getValueType(0);
11691
11692   // fold (fp_to_uint c1fp) -> c1
11693   if (isConstantFPBuildVectorOrConstantFP(N0))
11694     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
11695
11696   return FoldIntToFPToInt(N, DAG);
11697 }
11698
11699 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
11700   SDValue N0 = N->getOperand(0);
11701   SDValue N1 = N->getOperand(1);
11702   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11703   EVT VT = N->getValueType(0);
11704
11705   // fold (fp_round c1fp) -> c1fp
11706   if (N0CFP)
11707     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
11708
11709   // fold (fp_round (fp_extend x)) -> x
11710   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
11711     return N0.getOperand(0);
11712
11713   // fold (fp_round (fp_round x)) -> (fp_round x)
11714   if (N0.getOpcode() == ISD::FP_ROUND) {
11715     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
11716     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
11717
11718     // Skip this folding if it results in an fp_round from f80 to f16.
11719     //
11720     // f80 to f16 always generates an expensive (and as yet, unimplemented)
11721     // libcall to __truncxfhf2 instead of selecting native f16 conversion
11722     // instructions from f32 or f64.  Moreover, the first (value-preserving)
11723     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
11724     // x86.
11725     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
11726       return SDValue();
11727
11728     // If the first fp_round isn't a value preserving truncation, it might
11729     // introduce a tie in the second fp_round, that wouldn't occur in the
11730     // single-step fp_round we want to fold to.
11731     // In other words, double rounding isn't the same as rounding.
11732     // Also, this is a value preserving truncation iff both fp_round's are.
11733     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
11734       SDLoc DL(N);
11735       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
11736                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
11737     }
11738   }
11739
11740   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
11741   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
11742     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
11743                               N0.getOperand(0), N1);
11744     AddToWorklist(Tmp.getNode());
11745     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
11746                        Tmp, N0.getOperand(1));
11747   }
11748
11749   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11750     return NewVSel;
11751
11752   return SDValue();
11753 }
11754
11755 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
11756   SDValue N0 = N->getOperand(0);
11757   EVT VT = N->getValueType(0);
11758   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11759   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11760
11761   // fold (fp_round_inreg c1fp) -> c1fp
11762   if (N0CFP && isTypeLegal(EVT)) {
11763     SDLoc DL(N);
11764     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
11765     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
11766   }
11767
11768   return SDValue();
11769 }
11770
11771 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
11772   SDValue N0 = N->getOperand(0);
11773   EVT VT = N->getValueType(0);
11774
11775   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
11776   if (N->hasOneUse() &&
11777       N->use_begin()->getOpcode() == ISD::FP_ROUND)
11778     return SDValue();
11779
11780   // fold (fp_extend c1fp) -> c1fp
11781   if (isConstantFPBuildVectorOrConstantFP(N0))
11782     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
11783
11784   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
11785   if (N0.getOpcode() == ISD::FP16_TO_FP &&
11786       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
11787     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
11788
11789   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
11790   // value of X.
11791   if (N0.getOpcode() == ISD::FP_ROUND
11792       && N0.getConstantOperandVal(1) == 1) {
11793     SDValue In = N0.getOperand(0);
11794     if (In.getValueType() == VT) return In;
11795     if (VT.bitsLT(In.getValueType()))
11796       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
11797                          In, N0.getOperand(1));
11798     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
11799   }
11800
11801   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
11802   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11803        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11804     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11805     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11806                                      LN0->getChain(),
11807                                      LN0->getBasePtr(), N0.getValueType(),
11808                                      LN0->getMemOperand());
11809     CombineTo(N, ExtLoad);
11810     CombineTo(N0.getNode(),
11811               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
11812                           N0.getValueType(), ExtLoad,
11813                           DAG.getIntPtrConstant(1, SDLoc(N0))),
11814               ExtLoad.getValue(1));
11815     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11816   }
11817
11818   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11819     return NewVSel;
11820
11821   return SDValue();
11822 }
11823
11824 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
11825   SDValue N0 = N->getOperand(0);
11826   EVT VT = N->getValueType(0);
11827
11828   // fold (fceil c1) -> fceil(c1)
11829   if (isConstantFPBuildVectorOrConstantFP(N0))
11830     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
11831
11832   return SDValue();
11833 }
11834
11835 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
11836   SDValue N0 = N->getOperand(0);
11837   EVT VT = N->getValueType(0);
11838
11839   // fold (ftrunc c1) -> ftrunc(c1)
11840   if (isConstantFPBuildVectorOrConstantFP(N0))
11841     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
11842
11843   // fold ftrunc (known rounded int x) -> x
11844   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
11845   // likely to be generated to extract integer from a rounded floating value.
11846   switch (N0.getOpcode()) {
11847   default: break;
11848   case ISD::FRINT:
11849   case ISD::FTRUNC:
11850   case ISD::FNEARBYINT:
11851   case ISD::FFLOOR:
11852   case ISD::FCEIL:
11853     return N0;
11854   }
11855
11856   return SDValue();
11857 }
11858
11859 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
11860   SDValue N0 = N->getOperand(0);
11861   EVT VT = N->getValueType(0);
11862
11863   // fold (ffloor c1) -> ffloor(c1)
11864   if (isConstantFPBuildVectorOrConstantFP(N0))
11865     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
11866
11867   return SDValue();
11868 }
11869
11870 // FIXME: FNEG and FABS have a lot in common; refactor.
11871 SDValue DAGCombiner::visitFNEG(SDNode *N) {
11872   SDValue N0 = N->getOperand(0);
11873   EVT VT = N->getValueType(0);
11874
11875   // Constant fold FNEG.
11876   if (isConstantFPBuildVectorOrConstantFP(N0))
11877     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
11878
11879   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
11880                          &DAG.getTarget().Options))
11881     return GetNegatedExpression(N0, DAG, LegalOperations);
11882
11883   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
11884   // constant pool values.
11885   if (!TLI.isFNegFree(VT) &&
11886       N0.getOpcode() == ISD::BITCAST &&
11887       N0.getNode()->hasOneUse()) {
11888     SDValue Int = N0.getOperand(0);
11889     EVT IntVT = Int.getValueType();
11890     if (IntVT.isInteger() && !IntVT.isVector()) {
11891       APInt SignMask;
11892       if (N0.getValueType().isVector()) {
11893         // For a vector, get a mask such as 0x80... per scalar element
11894         // and splat it.
11895         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
11896         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
11897       } else {
11898         // For a scalar, just generate 0x80...
11899         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
11900       }
11901       SDLoc DL0(N0);
11902       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
11903                         DAG.getConstant(SignMask, DL0, IntVT));
11904       AddToWorklist(Int.getNode());
11905       return DAG.getBitcast(VT, Int);
11906     }
11907   }
11908
11909   // (fneg (fmul c, x)) -> (fmul -c, x)
11910   if (N0.getOpcode() == ISD::FMUL &&
11911       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
11912     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
11913     if (CFP1) {
11914       APFloat CVal = CFP1->getValueAPF();
11915       CVal.changeSign();
11916       if (Level >= AfterLegalizeDAG &&
11917           (TLI.isFPImmLegal(CVal, VT) ||
11918            TLI.isOperationLegal(ISD::ConstantFP, VT)))
11919         return DAG.getNode(
11920             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
11921             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
11922             N0->getFlags());
11923     }
11924   }
11925
11926   return SDValue();
11927 }
11928
11929 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
11930   SDValue N0 = N->getOperand(0);
11931   SDValue N1 = N->getOperand(1);
11932   EVT VT = N->getValueType(0);
11933   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11934   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11935
11936   if (N0CFP && N1CFP) {
11937     const APFloat &C0 = N0CFP->getValueAPF();
11938     const APFloat &C1 = N1CFP->getValueAPF();
11939     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
11940   }
11941
11942   // Canonicalize to constant on RHS.
11943   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11944      !isConstantFPBuildVectorOrConstantFP(N1))
11945     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
11946
11947   return SDValue();
11948 }
11949
11950 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
11951   SDValue N0 = N->getOperand(0);
11952   SDValue N1 = N->getOperand(1);
11953   EVT VT = N->getValueType(0);
11954   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11955   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11956
11957   if (N0CFP && N1CFP) {
11958     const APFloat &C0 = N0CFP->getValueAPF();
11959     const APFloat &C1 = N1CFP->getValueAPF();
11960     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
11961   }
11962
11963   // Canonicalize to constant on RHS.
11964   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11965      !isConstantFPBuildVectorOrConstantFP(N1))
11966     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
11967
11968   return SDValue();
11969 }
11970
11971 SDValue DAGCombiner::visitFABS(SDNode *N) {
11972   SDValue N0 = N->getOperand(0);
11973   EVT VT = N->getValueType(0);
11974
11975   // fold (fabs c1) -> fabs(c1)
11976   if (isConstantFPBuildVectorOrConstantFP(N0))
11977     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11978
11979   // fold (fabs (fabs x)) -> (fabs x)
11980   if (N0.getOpcode() == ISD::FABS)
11981     return N->getOperand(0);
11982
11983   // fold (fabs (fneg x)) -> (fabs x)
11984   // fold (fabs (fcopysign x, y)) -> (fabs x)
11985   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
11986     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
11987
11988   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
11989   // constant pool values.
11990   if (!TLI.isFAbsFree(VT) &&
11991       N0.getOpcode() == ISD::BITCAST &&
11992       N0.getNode()->hasOneUse()) {
11993     SDValue Int = N0.getOperand(0);
11994     EVT IntVT = Int.getValueType();
11995     if (IntVT.isInteger() && !IntVT.isVector()) {
11996       APInt SignMask;
11997       if (N0.getValueType().isVector()) {
11998         // For a vector, get a mask such as 0x7f... per scalar element
11999         // and splat it.
12000         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
12001         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12002       } else {
12003         // For a scalar, just generate 0x7f...
12004         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
12005       }
12006       SDLoc DL(N0);
12007       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
12008                         DAG.getConstant(SignMask, DL, IntVT));
12009       AddToWorklist(Int.getNode());
12010       return DAG.getBitcast(N->getValueType(0), Int);
12011     }
12012   }
12013
12014   return SDValue();
12015 }
12016
12017 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
12018   SDValue Chain = N->getOperand(0);
12019   SDValue N1 = N->getOperand(1);
12020   SDValue N2 = N->getOperand(2);
12021
12022   // If N is a constant we could fold this into a fallthrough or unconditional
12023   // branch. However that doesn't happen very often in normal code, because
12024   // Instcombine/SimplifyCFG should have handled the available opportunities.
12025   // If we did this folding here, it would be necessary to update the
12026   // MachineBasicBlock CFG, which is awkward.
12027
12028   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12029   // on the target.
12030   if (N1.getOpcode() == ISD::SETCC &&
12031       TLI.isOperationLegalOrCustom(ISD::BR_CC,
12032                                    N1.getOperand(0).getValueType())) {
12033     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12034                        Chain, N1.getOperand(2),
12035                        N1.getOperand(0), N1.getOperand(1), N2);
12036   }
12037
12038   if (N1.hasOneUse()) {
12039     if (SDValue NewN1 = rebuildSetCC(N1))
12040       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
12041   }
12042
12043   return SDValue();
12044 }
12045
12046 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
12047   if (N.getOpcode() == ISD::SRL ||
12048       (N.getOpcode() == ISD::TRUNCATE &&
12049        (N.getOperand(0).hasOneUse() &&
12050         N.getOperand(0).getOpcode() == ISD::SRL))) {
12051     // Look pass the truncate.
12052     if (N.getOpcode() == ISD::TRUNCATE)
12053       N = N.getOperand(0);
12054
12055     // Match this pattern so that we can generate simpler code:
12056     //
12057     //   %a = ...
12058     //   %b = and i32 %a, 2
12059     //   %c = srl i32 %b, 1
12060     //   brcond i32 %c ...
12061     //
12062     // into
12063     //
12064     //   %a = ...
12065     //   %b = and i32 %a, 2
12066     //   %c = setcc eq %b, 0
12067     //   brcond %c ...
12068     //
12069     // This applies only when the AND constant value has one bit set and the
12070     // SRL constant is equal to the log2 of the AND constant. The back-end is
12071     // smart enough to convert the result into a TEST/JMP sequence.
12072     SDValue Op0 = N.getOperand(0);
12073     SDValue Op1 = N.getOperand(1);
12074
12075     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
12076       SDValue AndOp1 = Op0.getOperand(1);
12077
12078       if (AndOp1.getOpcode() == ISD::Constant) {
12079         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
12080
12081         if (AndConst.isPowerOf2() &&
12082             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
12083           SDLoc DL(N);
12084           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
12085                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
12086                               ISD::SETNE);
12087         }
12088       }
12089     }
12090   }
12091
12092   // Transform br(xor(x, y)) -> br(x != y)
12093   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12094   if (N.getOpcode() == ISD::XOR) {
12095     // Because we may call this on a speculatively constructed
12096     // SimplifiedSetCC Node, we need to simplify this node first.
12097     // Ideally this should be folded into SimplifySetCC and not
12098     // here. For now, grab a handle to N so we don't lose it from
12099     // replacements interal to the visit.
12100     HandleSDNode XORHandle(N);
12101     while (N.getOpcode() == ISD::XOR) {
12102       SDValue Tmp = visitXOR(N.getNode());
12103       // No simplification done.
12104       if (!Tmp.getNode())
12105         break;
12106       // Returning N is form in-visit replacement that may invalidated
12107       // N. Grab value from Handle.
12108       if (Tmp.getNode() == N.getNode())
12109         N = XORHandle.getValue();
12110       else // Node simplified. Try simplifying again.
12111         N = Tmp;
12112     }
12113
12114     if (N.getOpcode() != ISD::XOR)
12115       return N;
12116
12117     SDNode *TheXor = N.getNode();
12118
12119     SDValue Op0 = TheXor->getOperand(0);
12120     SDValue Op1 = TheXor->getOperand(1);
12121
12122     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
12123       bool Equal = false;
12124       if (isOneConstant(Op0) && Op0.hasOneUse() &&
12125           Op0.getOpcode() == ISD::XOR) {
12126         TheXor = Op0.getNode();
12127         Equal = true;
12128       }
12129
12130       EVT SetCCVT = N.getValueType();
12131       if (LegalTypes)
12132         SetCCVT = getSetCCResultType(SetCCVT);
12133       // Replace the uses of XOR with SETCC
12134       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
12135                           Equal ? ISD::SETEQ : ISD::SETNE);
12136     }
12137   }
12138
12139   return SDValue();
12140 }
12141
12142 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
12143 //
12144 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
12145   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
12146   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
12147
12148   // If N is a constant we could fold this into a fallthrough or unconditional
12149   // branch. However that doesn't happen very often in normal code, because
12150   // Instcombine/SimplifyCFG should have handled the available opportunities.
12151   // If we did this folding here, it would be necessary to update the
12152   // MachineBasicBlock CFG, which is awkward.
12153
12154   // Use SimplifySetCC to simplify SETCC's.
12155   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
12156                                CondLHS, CondRHS, CC->get(), SDLoc(N),
12157                                false);
12158   if (Simp.getNode()) AddToWorklist(Simp.getNode());
12159
12160   // fold to a simpler setcc
12161   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
12162     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12163                        N->getOperand(0), Simp.getOperand(2),
12164                        Simp.getOperand(0), Simp.getOperand(1),
12165                        N->getOperand(4));
12166
12167   return SDValue();
12168 }
12169
12170 /// Return true if 'Use' is a load or a store that uses N as its base pointer
12171 /// and that N may be folded in the load / store addressing mode.
12172 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
12173                                     SelectionDAG &DAG,
12174                                     const TargetLowering &TLI) {
12175   EVT VT;
12176   unsigned AS;
12177
12178   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
12179     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
12180       return false;
12181     VT = LD->getMemoryVT();
12182     AS = LD->getAddressSpace();
12183   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
12184     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
12185       return false;
12186     VT = ST->getMemoryVT();
12187     AS = ST->getAddressSpace();
12188   } else
12189     return false;
12190
12191   TargetLowering::AddrMode AM;
12192   if (N->getOpcode() == ISD::ADD) {
12193     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12194     if (Offset)
12195       // [reg +/- imm]
12196       AM.BaseOffs = Offset->getSExtValue();
12197     else
12198       // [reg +/- reg]
12199       AM.Scale = 1;
12200   } else if (N->getOpcode() == ISD::SUB) {
12201     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12202     if (Offset)
12203       // [reg +/- imm]
12204       AM.BaseOffs = -Offset->getSExtValue();
12205     else
12206       // [reg +/- reg]
12207       AM.Scale = 1;
12208   } else
12209     return false;
12210
12211   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
12212                                    VT.getTypeForEVT(*DAG.getContext()), AS);
12213 }
12214
12215 /// Try turning a load/store into a pre-indexed load/store when the base
12216 /// pointer is an add or subtract and it has other uses besides the load/store.
12217 /// After the transformation, the new indexed load/store has effectively folded
12218 /// the add/subtract in and all of its other uses are redirected to the
12219 /// new load/store.
12220 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
12221   if (Level < AfterLegalizeDAG)
12222     return false;
12223
12224   bool isLoad = true;
12225   SDValue Ptr;
12226   EVT VT;
12227   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
12228     if (LD->isIndexed())
12229       return false;
12230     VT = LD->getMemoryVT();
12231     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
12232         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
12233       return false;
12234     Ptr = LD->getBasePtr();
12235   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
12236     if (ST->isIndexed())
12237       return false;
12238     VT = ST->getMemoryVT();
12239     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
12240         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
12241       return false;
12242     Ptr = ST->getBasePtr();
12243     isLoad = false;
12244   } else {
12245     return false;
12246   }
12247
12248   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
12249   // out.  There is no reason to make this a preinc/predec.
12250   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
12251       Ptr.getNode()->hasOneUse())
12252     return false;
12253
12254   // Ask the target to do addressing mode selection.
12255   SDValue BasePtr;
12256   SDValue Offset;
12257   ISD::MemIndexedMode AM = ISD::UNINDEXED;
12258   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
12259     return false;
12260
12261   // Backends without true r+i pre-indexed forms may need to pass a
12262   // constant base with a variable offset so that constant coercion
12263   // will work with the patterns in canonical form.
12264   bool Swapped = false;
12265   if (isa<ConstantSDNode>(BasePtr)) {
12266     std::swap(BasePtr, Offset);
12267     Swapped = true;
12268   }
12269
12270   // Don't create a indexed load / store with zero offset.
12271   if (isNullConstant(Offset))
12272     return false;
12273
12274   // Try turning it into a pre-indexed load / store except when:
12275   // 1) The new base ptr is a frame index.
12276   // 2) If N is a store and the new base ptr is either the same as or is a
12277   //    predecessor of the value being stored.
12278   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
12279   //    that would create a cycle.
12280   // 4) All uses are load / store ops that use it as old base ptr.
12281
12282   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
12283   // (plus the implicit offset) to a register to preinc anyway.
12284   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12285     return false;
12286
12287   // Check #2.
12288   if (!isLoad) {
12289     SDValue Val = cast<StoreSDNode>(N)->getValue();
12290     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
12291       return false;
12292   }
12293
12294   // Caches for hasPredecessorHelper.
12295   SmallPtrSet<const SDNode *, 32> Visited;
12296   SmallVector<const SDNode *, 16> Worklist;
12297   Worklist.push_back(N);
12298
12299   // If the offset is a constant, there may be other adds of constants that
12300   // can be folded with this one. We should do this to avoid having to keep
12301   // a copy of the original base pointer.
12302   SmallVector<SDNode *, 16> OtherUses;
12303   if (isa<ConstantSDNode>(Offset))
12304     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
12305                               UE = BasePtr.getNode()->use_end();
12306          UI != UE; ++UI) {
12307       SDUse &Use = UI.getUse();
12308       // Skip the use that is Ptr and uses of other results from BasePtr's
12309       // node (important for nodes that return multiple results).
12310       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
12311         continue;
12312
12313       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
12314         continue;
12315
12316       if (Use.getUser()->getOpcode() != ISD::ADD &&
12317           Use.getUser()->getOpcode() != ISD::SUB) {
12318         OtherUses.clear();
12319         break;
12320       }
12321
12322       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
12323       if (!isa<ConstantSDNode>(Op1)) {
12324         OtherUses.clear();
12325         break;
12326       }
12327
12328       // FIXME: In some cases, we can be smarter about this.
12329       if (Op1.getValueType() != Offset.getValueType()) {
12330         OtherUses.clear();
12331         break;
12332       }
12333
12334       OtherUses.push_back(Use.getUser());
12335     }
12336
12337   if (Swapped)
12338     std::swap(BasePtr, Offset);
12339
12340   // Now check for #3 and #4.
12341   bool RealUse = false;
12342
12343   for (SDNode *Use : Ptr.getNode()->uses()) {
12344     if (Use == N)
12345       continue;
12346     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
12347       return false;
12348
12349     // If Ptr may be folded in addressing mode of other use, then it's
12350     // not profitable to do this transformation.
12351     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
12352       RealUse = true;
12353   }
12354
12355   if (!RealUse)
12356     return false;
12357
12358   SDValue Result;
12359   if (isLoad)
12360     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12361                                 BasePtr, Offset, AM);
12362   else
12363     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12364                                  BasePtr, Offset, AM);
12365   ++PreIndexedNodes;
12366   ++NodesCombined;
12367   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
12368              Result.getNode()->dump(&DAG); dbgs() << '\n');
12369   WorklistRemover DeadNodes(*this);
12370   if (isLoad) {
12371     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12372     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12373   } else {
12374     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12375   }
12376
12377   // Finally, since the node is now dead, remove it from the graph.
12378   deleteAndRecombine(N);
12379
12380   if (Swapped)
12381     std::swap(BasePtr, Offset);
12382
12383   // Replace other uses of BasePtr that can be updated to use Ptr
12384   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
12385     unsigned OffsetIdx = 1;
12386     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
12387       OffsetIdx = 0;
12388     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
12389            BasePtr.getNode() && "Expected BasePtr operand");
12390
12391     // We need to replace ptr0 in the following expression:
12392     //   x0 * offset0 + y0 * ptr0 = t0
12393     // knowing that
12394     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
12395     //
12396     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
12397     // indexed load/store and the expression that needs to be re-written.
12398     //
12399     // Therefore, we have:
12400     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
12401
12402     ConstantSDNode *CN =
12403       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
12404     int X0, X1, Y0, Y1;
12405     const APInt &Offset0 = CN->getAPIntValue();
12406     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
12407
12408     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
12409     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
12410     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
12411     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
12412
12413     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
12414
12415     APInt CNV = Offset0;
12416     if (X0 < 0) CNV = -CNV;
12417     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
12418     else CNV = CNV - Offset1;
12419
12420     SDLoc DL(OtherUses[i]);
12421
12422     // We can now generate the new expression.
12423     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
12424     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
12425
12426     SDValue NewUse = DAG.getNode(Opcode,
12427                                  DL,
12428                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
12429     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
12430     deleteAndRecombine(OtherUses[i]);
12431   }
12432
12433   // Replace the uses of Ptr with uses of the updated base value.
12434   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
12435   deleteAndRecombine(Ptr.getNode());
12436   AddToWorklist(Result.getNode());
12437
12438   return true;
12439 }
12440
12441 /// Try to combine a load/store with a add/sub of the base pointer node into a
12442 /// post-indexed load/store. The transformation folded the add/subtract into the
12443 /// new indexed load/store effectively and all of its uses are redirected to the
12444 /// new load/store.
12445 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
12446   if (Level < AfterLegalizeDAG)
12447     return false;
12448
12449   bool isLoad = true;
12450   SDValue Ptr;
12451   EVT VT;
12452   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
12453     if (LD->isIndexed())
12454       return false;
12455     VT = LD->getMemoryVT();
12456     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
12457         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
12458       return false;
12459     Ptr = LD->getBasePtr();
12460   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
12461     if (ST->isIndexed())
12462       return false;
12463     VT = ST->getMemoryVT();
12464     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
12465         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
12466       return false;
12467     Ptr = ST->getBasePtr();
12468     isLoad = false;
12469   } else {
12470     return false;
12471   }
12472
12473   if (Ptr.getNode()->hasOneUse())
12474     return false;
12475
12476   for (SDNode *Op : Ptr.getNode()->uses()) {
12477     if (Op == N ||
12478         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
12479       continue;
12480
12481     SDValue BasePtr;
12482     SDValue Offset;
12483     ISD::MemIndexedMode AM = ISD::UNINDEXED;
12484     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
12485       // Don't create a indexed load / store with zero offset.
12486       if (isNullConstant(Offset))
12487         continue;
12488
12489       // Try turning it into a post-indexed load / store except when
12490       // 1) All uses are load / store ops that use it as base ptr (and
12491       //    it may be folded as addressing mmode).
12492       // 2) Op must be independent of N, i.e. Op is neither a predecessor
12493       //    nor a successor of N. Otherwise, if Op is folded that would
12494       //    create a cycle.
12495
12496       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12497         continue;
12498
12499       // Check for #1.
12500       bool TryNext = false;
12501       for (SDNode *Use : BasePtr.getNode()->uses()) {
12502         if (Use == Ptr.getNode())
12503           continue;
12504
12505         // If all the uses are load / store addresses, then don't do the
12506         // transformation.
12507         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
12508           bool RealUse = false;
12509           for (SDNode *UseUse : Use->uses()) {
12510             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
12511               RealUse = true;
12512           }
12513
12514           if (!RealUse) {
12515             TryNext = true;
12516             break;
12517           }
12518         }
12519       }
12520
12521       if (TryNext)
12522         continue;
12523
12524       // Check for #2
12525       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
12526         SDValue Result = isLoad
12527           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12528                                BasePtr, Offset, AM)
12529           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12530                                 BasePtr, Offset, AM);
12531         ++PostIndexedNodes;
12532         ++NodesCombined;
12533         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
12534                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
12535                    dbgs() << '\n');
12536         WorklistRemover DeadNodes(*this);
12537         if (isLoad) {
12538           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12539           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12540         } else {
12541           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12542         }
12543
12544         // Finally, since the node is now dead, remove it from the graph.
12545         deleteAndRecombine(N);
12546
12547         // Replace the uses of Use with uses of the updated base value.
12548         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
12549                                       Result.getValue(isLoad ? 1 : 0));
12550         deleteAndRecombine(Op);
12551         return true;
12552       }
12553     }
12554   }
12555
12556   return false;
12557 }
12558
12559 /// Return the base-pointer arithmetic from an indexed \p LD.
12560 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
12561   ISD::MemIndexedMode AM = LD->getAddressingMode();
12562   assert(AM != ISD::UNINDEXED);
12563   SDValue BP = LD->getOperand(1);
12564   SDValue Inc = LD->getOperand(2);
12565
12566   // Some backends use TargetConstants for load offsets, but don't expect
12567   // TargetConstants in general ADD nodes. We can convert these constants into
12568   // regular Constants (if the constant is not opaque).
12569   assert((Inc.getOpcode() != ISD::TargetConstant ||
12570           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
12571          "Cannot split out indexing using opaque target constants");
12572   if (Inc.getOpcode() == ISD::TargetConstant) {
12573     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
12574     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
12575                           ConstInc->getValueType(0));
12576   }
12577
12578   unsigned Opc =
12579       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
12580   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
12581 }
12582
12583 SDValue DAGCombiner::visitLOAD(SDNode *N) {
12584   LoadSDNode *LD  = cast<LoadSDNode>(N);
12585   SDValue Chain = LD->getChain();
12586   SDValue Ptr   = LD->getBasePtr();
12587
12588   // If load is not volatile and there are no uses of the loaded value (and
12589   // the updated indexed value in case of indexed loads), change uses of the
12590   // chain value into uses of the chain input (i.e. delete the dead load).
12591   if (!LD->isVolatile()) {
12592     if (N->getValueType(1) == MVT::Other) {
12593       // Unindexed loads.
12594       if (!N->hasAnyUseOfValue(0)) {
12595         // It's not safe to use the two value CombineTo variant here. e.g.
12596         // v1, chain2 = load chain1, loc
12597         // v2, chain3 = load chain2, loc
12598         // v3         = add v2, c
12599         // Now we replace use of chain2 with chain1.  This makes the second load
12600         // isomorphic to the one we are deleting, and thus makes this load live.
12601         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
12602                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
12603                    dbgs() << "\n");
12604         WorklistRemover DeadNodes(*this);
12605         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12606         AddUsersToWorklist(Chain.getNode());
12607         if (N->use_empty())
12608           deleteAndRecombine(N);
12609
12610         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12611       }
12612     } else {
12613       // Indexed loads.
12614       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
12615
12616       // If this load has an opaque TargetConstant offset, then we cannot split
12617       // the indexing into an add/sub directly (that TargetConstant may not be
12618       // valid for a different type of node, and we cannot convert an opaque
12619       // target constant into a regular constant).
12620       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
12621                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
12622
12623       if (!N->hasAnyUseOfValue(0) &&
12624           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
12625         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
12626         SDValue Index;
12627         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
12628           Index = SplitIndexingFromLoad(LD);
12629           // Try to fold the base pointer arithmetic into subsequent loads and
12630           // stores.
12631           AddUsersToWorklist(N);
12632         } else
12633           Index = DAG.getUNDEF(N->getValueType(1));
12634         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
12635                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
12636                    dbgs() << " and 2 other values\n");
12637         WorklistRemover DeadNodes(*this);
12638         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
12639         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
12640         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
12641         deleteAndRecombine(N);
12642         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12643       }
12644     }
12645   }
12646
12647   // If this load is directly stored, replace the load value with the stored
12648   // value.
12649   // TODO: Handle store large -> read small portion.
12650   // TODO: Handle TRUNCSTORE/LOADEXT
12651   if (OptLevel != CodeGenOpt::None &&
12652       ISD::isNormalLoad(N) && !LD->isVolatile()) {
12653     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
12654       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
12655       if (PrevST->getBasePtr() == Ptr &&
12656           PrevST->getValue().getValueType() == N->getValueType(0))
12657         return CombineTo(N, PrevST->getOperand(1), Chain);
12658     }
12659   }
12660
12661   // Try to infer better alignment information than the load already has.
12662   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
12663     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
12664       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
12665         SDValue NewLoad = DAG.getExtLoad(
12666             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
12667             LD->getPointerInfo(), LD->getMemoryVT(), Align,
12668             LD->getMemOperand()->getFlags(), LD->getAAInfo());
12669         // NewLoad will always be N as we are only refining the alignment
12670         assert(NewLoad.getNode() == N);
12671         (void)NewLoad;
12672       }
12673     }
12674   }
12675
12676   if (LD->isUnindexed()) {
12677     // Walk up chain skipping non-aliasing memory nodes.
12678     SDValue BetterChain = FindBetterChain(N, Chain);
12679
12680     // If there is a better chain.
12681     if (Chain != BetterChain) {
12682       SDValue ReplLoad;
12683
12684       // Replace the chain to void dependency.
12685       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
12686         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
12687                                BetterChain, Ptr, LD->getMemOperand());
12688       } else {
12689         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
12690                                   LD->getValueType(0),
12691                                   BetterChain, Ptr, LD->getMemoryVT(),
12692                                   LD->getMemOperand());
12693       }
12694
12695       // Create token factor to keep old chain connected.
12696       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
12697                                   MVT::Other, Chain, ReplLoad.getValue(1));
12698
12699       // Replace uses with load result and token factor
12700       return CombineTo(N, ReplLoad.getValue(0), Token);
12701     }
12702   }
12703
12704   // Try transforming N to an indexed load.
12705   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12706     return SDValue(N, 0);
12707
12708   // Try to slice up N to more direct loads if the slices are mapped to
12709   // different register banks or pairing can take place.
12710   if (SliceUpLoad(N))
12711     return SDValue(N, 0);
12712
12713   return SDValue();
12714 }
12715
12716 namespace {
12717
12718 /// Helper structure used to slice a load in smaller loads.
12719 /// Basically a slice is obtained from the following sequence:
12720 /// Origin = load Ty1, Base
12721 /// Shift = srl Ty1 Origin, CstTy Amount
12722 /// Inst = trunc Shift to Ty2
12723 ///
12724 /// Then, it will be rewritten into:
12725 /// Slice = load SliceTy, Base + SliceOffset
12726 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
12727 ///
12728 /// SliceTy is deduced from the number of bits that are actually used to
12729 /// build Inst.
12730 struct LoadedSlice {
12731   /// Helper structure used to compute the cost of a slice.
12732   struct Cost {
12733     /// Are we optimizing for code size.
12734     bool ForCodeSize;
12735
12736     /// Various cost.
12737     unsigned Loads = 0;
12738     unsigned Truncates = 0;
12739     unsigned CrossRegisterBanksCopies = 0;
12740     unsigned ZExts = 0;
12741     unsigned Shift = 0;
12742
12743     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
12744
12745     /// Get the cost of one isolated slice.
12746     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
12747         : ForCodeSize(ForCodeSize), Loads(1) {
12748       EVT TruncType = LS.Inst->getValueType(0);
12749       EVT LoadedType = LS.getLoadedType();
12750       if (TruncType != LoadedType &&
12751           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
12752         ZExts = 1;
12753     }
12754
12755     /// Account for slicing gain in the current cost.
12756     /// Slicing provide a few gains like removing a shift or a
12757     /// truncate. This method allows to grow the cost of the original
12758     /// load with the gain from this slice.
12759     void addSliceGain(const LoadedSlice &LS) {
12760       // Each slice saves a truncate.
12761       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
12762       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
12763                               LS.Inst->getValueType(0)))
12764         ++Truncates;
12765       // If there is a shift amount, this slice gets rid of it.
12766       if (LS.Shift)
12767         ++Shift;
12768       // If this slice can merge a cross register bank copy, account for it.
12769       if (LS.canMergeExpensiveCrossRegisterBankCopy())
12770         ++CrossRegisterBanksCopies;
12771     }
12772
12773     Cost &operator+=(const Cost &RHS) {
12774       Loads += RHS.Loads;
12775       Truncates += RHS.Truncates;
12776       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
12777       ZExts += RHS.ZExts;
12778       Shift += RHS.Shift;
12779       return *this;
12780     }
12781
12782     bool operator==(const Cost &RHS) const {
12783       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
12784              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
12785              ZExts == RHS.ZExts && Shift == RHS.Shift;
12786     }
12787
12788     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
12789
12790     bool operator<(const Cost &RHS) const {
12791       // Assume cross register banks copies are as expensive as loads.
12792       // FIXME: Do we want some more target hooks?
12793       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
12794       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
12795       // Unless we are optimizing for code size, consider the
12796       // expensive operation first.
12797       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
12798         return ExpensiveOpsLHS < ExpensiveOpsRHS;
12799       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
12800              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
12801     }
12802
12803     bool operator>(const Cost &RHS) const { return RHS < *this; }
12804
12805     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
12806
12807     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
12808   };
12809
12810   // The last instruction that represent the slice. This should be a
12811   // truncate instruction.
12812   SDNode *Inst;
12813
12814   // The original load instruction.
12815   LoadSDNode *Origin;
12816
12817   // The right shift amount in bits from the original load.
12818   unsigned Shift;
12819
12820   // The DAG from which Origin came from.
12821   // This is used to get some contextual information about legal types, etc.
12822   SelectionDAG *DAG;
12823
12824   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
12825               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
12826       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
12827
12828   /// Get the bits used in a chunk of bits \p BitWidth large.
12829   /// \return Result is \p BitWidth and has used bits set to 1 and
12830   ///         not used bits set to 0.
12831   APInt getUsedBits() const {
12832     // Reproduce the trunc(lshr) sequence:
12833     // - Start from the truncated value.
12834     // - Zero extend to the desired bit width.
12835     // - Shift left.
12836     assert(Origin && "No original load to compare against.");
12837     unsigned BitWidth = Origin->getValueSizeInBits(0);
12838     assert(Inst && "This slice is not bound to an instruction");
12839     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
12840            "Extracted slice is bigger than the whole type!");
12841     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
12842     UsedBits.setAllBits();
12843     UsedBits = UsedBits.zext(BitWidth);
12844     UsedBits <<= Shift;
12845     return UsedBits;
12846   }
12847
12848   /// Get the size of the slice to be loaded in bytes.
12849   unsigned getLoadedSize() const {
12850     unsigned SliceSize = getUsedBits().countPopulation();
12851     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
12852     return SliceSize / 8;
12853   }
12854
12855   /// Get the type that will be loaded for this slice.
12856   /// Note: This may not be the final type for the slice.
12857   EVT getLoadedType() const {
12858     assert(DAG && "Missing context");
12859     LLVMContext &Ctxt = *DAG->getContext();
12860     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
12861   }
12862
12863   /// Get the alignment of the load used for this slice.
12864   unsigned getAlignment() const {
12865     unsigned Alignment = Origin->getAlignment();
12866     unsigned Offset = getOffsetFromBase();
12867     if (Offset != 0)
12868       Alignment = MinAlign(Alignment, Alignment + Offset);
12869     return Alignment;
12870   }
12871
12872   /// Check if this slice can be rewritten with legal operations.
12873   bool isLegal() const {
12874     // An invalid slice is not legal.
12875     if (!Origin || !Inst || !DAG)
12876       return false;
12877
12878     // Offsets are for indexed load only, we do not handle that.
12879     if (!Origin->getOffset().isUndef())
12880       return false;
12881
12882     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
12883
12884     // Check that the type is legal.
12885     EVT SliceType = getLoadedType();
12886     if (!TLI.isTypeLegal(SliceType))
12887       return false;
12888
12889     // Check that the load is legal for this type.
12890     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
12891       return false;
12892
12893     // Check that the offset can be computed.
12894     // 1. Check its type.
12895     EVT PtrType = Origin->getBasePtr().getValueType();
12896     if (PtrType == MVT::Untyped || PtrType.isExtended())
12897       return false;
12898
12899     // 2. Check that it fits in the immediate.
12900     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
12901       return false;
12902
12903     // 3. Check that the computation is legal.
12904     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
12905       return false;
12906
12907     // Check that the zext is legal if it needs one.
12908     EVT TruncateType = Inst->getValueType(0);
12909     if (TruncateType != SliceType &&
12910         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
12911       return false;
12912
12913     return true;
12914   }
12915
12916   /// Get the offset in bytes of this slice in the original chunk of
12917   /// bits.
12918   /// \pre DAG != nullptr.
12919   uint64_t getOffsetFromBase() const {
12920     assert(DAG && "Missing context.");
12921     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
12922     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
12923     uint64_t Offset = Shift / 8;
12924     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
12925     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
12926            "The size of the original loaded type is not a multiple of a"
12927            " byte.");
12928     // If Offset is bigger than TySizeInBytes, it means we are loading all
12929     // zeros. This should have been optimized before in the process.
12930     assert(TySizeInBytes > Offset &&
12931            "Invalid shift amount for given loaded size");
12932     if (IsBigEndian)
12933       Offset = TySizeInBytes - Offset - getLoadedSize();
12934     return Offset;
12935   }
12936
12937   /// Generate the sequence of instructions to load the slice
12938   /// represented by this object and redirect the uses of this slice to
12939   /// this new sequence of instructions.
12940   /// \pre this->Inst && this->Origin are valid Instructions and this
12941   /// object passed the legal check: LoadedSlice::isLegal returned true.
12942   /// \return The last instruction of the sequence used to load the slice.
12943   SDValue loadSlice() const {
12944     assert(Inst && Origin && "Unable to replace a non-existing slice.");
12945     const SDValue &OldBaseAddr = Origin->getBasePtr();
12946     SDValue BaseAddr = OldBaseAddr;
12947     // Get the offset in that chunk of bytes w.r.t. the endianness.
12948     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
12949     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
12950     if (Offset) {
12951       // BaseAddr = BaseAddr + Offset.
12952       EVT ArithType = BaseAddr.getValueType();
12953       SDLoc DL(Origin);
12954       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
12955                               DAG->getConstant(Offset, DL, ArithType));
12956     }
12957
12958     // Create the type of the loaded slice according to its size.
12959     EVT SliceType = getLoadedType();
12960
12961     // Create the load for the slice.
12962     SDValue LastInst =
12963         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
12964                      Origin->getPointerInfo().getWithOffset(Offset),
12965                      getAlignment(), Origin->getMemOperand()->getFlags());
12966     // If the final type is not the same as the loaded type, this means that
12967     // we have to pad with zero. Create a zero extend for that.
12968     EVT FinalType = Inst->getValueType(0);
12969     if (SliceType != FinalType)
12970       LastInst =
12971           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
12972     return LastInst;
12973   }
12974
12975   /// Check if this slice can be merged with an expensive cross register
12976   /// bank copy. E.g.,
12977   /// i = load i32
12978   /// f = bitcast i32 i to float
12979   bool canMergeExpensiveCrossRegisterBankCopy() const {
12980     if (!Inst || !Inst->hasOneUse())
12981       return false;
12982     SDNode *Use = *Inst->use_begin();
12983     if (Use->getOpcode() != ISD::BITCAST)
12984       return false;
12985     assert(DAG && "Missing context");
12986     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
12987     EVT ResVT = Use->getValueType(0);
12988     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
12989     const TargetRegisterClass *ArgRC =
12990         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
12991     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
12992       return false;
12993
12994     // At this point, we know that we perform a cross-register-bank copy.
12995     // Check if it is expensive.
12996     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
12997     // Assume bitcasts are cheap, unless both register classes do not
12998     // explicitly share a common sub class.
12999     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
13000       return false;
13001
13002     // Check if it will be merged with the load.
13003     // 1. Check the alignment constraint.
13004     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
13005         ResVT.getTypeForEVT(*DAG->getContext()));
13006
13007     if (RequiredAlignment > getAlignment())
13008       return false;
13009
13010     // 2. Check that the load is a legal operation for that type.
13011     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
13012       return false;
13013
13014     // 3. Check that we do not have a zext in the way.
13015     if (Inst->getValueType(0) != getLoadedType())
13016       return false;
13017
13018     return true;
13019   }
13020 };
13021
13022 } // end anonymous namespace
13023
13024 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
13025 /// \p UsedBits looks like 0..0 1..1 0..0.
13026 static bool areUsedBitsDense(const APInt &UsedBits) {
13027   // If all the bits are one, this is dense!
13028   if (UsedBits.isAllOnesValue())
13029     return true;
13030
13031   // Get rid of the unused bits on the right.
13032   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
13033   // Get rid of the unused bits on the left.
13034   if (NarrowedUsedBits.countLeadingZeros())
13035     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
13036   // Check that the chunk of bits is completely used.
13037   return NarrowedUsedBits.isAllOnesValue();
13038 }
13039
13040 /// Check whether or not \p First and \p Second are next to each other
13041 /// in memory. This means that there is no hole between the bits loaded
13042 /// by \p First and the bits loaded by \p Second.
13043 static bool areSlicesNextToEachOther(const LoadedSlice &First,
13044                                      const LoadedSlice &Second) {
13045   assert(First.Origin == Second.Origin && First.Origin &&
13046          "Unable to match different memory origins.");
13047   APInt UsedBits = First.getUsedBits();
13048   assert((UsedBits & Second.getUsedBits()) == 0 &&
13049          "Slices are not supposed to overlap.");
13050   UsedBits |= Second.getUsedBits();
13051   return areUsedBitsDense(UsedBits);
13052 }
13053
13054 /// Adjust the \p GlobalLSCost according to the target
13055 /// paring capabilities and the layout of the slices.
13056 /// \pre \p GlobalLSCost should account for at least as many loads as
13057 /// there is in the slices in \p LoadedSlices.
13058 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13059                                  LoadedSlice::Cost &GlobalLSCost) {
13060   unsigned NumberOfSlices = LoadedSlices.size();
13061   // If there is less than 2 elements, no pairing is possible.
13062   if (NumberOfSlices < 2)
13063     return;
13064
13065   // Sort the slices so that elements that are likely to be next to each
13066   // other in memory are next to each other in the list.
13067   llvm::sort(LoadedSlices.begin(), LoadedSlices.end(),
13068              [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
13069     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
13070     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
13071   });
13072   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
13073   // First (resp. Second) is the first (resp. Second) potentially candidate
13074   // to be placed in a paired load.
13075   const LoadedSlice *First = nullptr;
13076   const LoadedSlice *Second = nullptr;
13077   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
13078                 // Set the beginning of the pair.
13079                                                            First = Second) {
13080     Second = &LoadedSlices[CurrSlice];
13081
13082     // If First is NULL, it means we start a new pair.
13083     // Get to the next slice.
13084     if (!First)
13085       continue;
13086
13087     EVT LoadedType = First->getLoadedType();
13088
13089     // If the types of the slices are different, we cannot pair them.
13090     if (LoadedType != Second->getLoadedType())
13091       continue;
13092
13093     // Check if the target supplies paired loads for this type.
13094     unsigned RequiredAlignment = 0;
13095     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
13096       // move to the next pair, this type is hopeless.
13097       Second = nullptr;
13098       continue;
13099     }
13100     // Check if we meet the alignment requirement.
13101     if (RequiredAlignment > First->getAlignment())
13102       continue;
13103
13104     // Check that both loads are next to each other in memory.
13105     if (!areSlicesNextToEachOther(*First, *Second))
13106       continue;
13107
13108     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
13109     --GlobalLSCost.Loads;
13110     // Move to the next pair.
13111     Second = nullptr;
13112   }
13113 }
13114
13115 /// Check the profitability of all involved LoadedSlice.
13116 /// Currently, it is considered profitable if there is exactly two
13117 /// involved slices (1) which are (2) next to each other in memory, and
13118 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
13119 ///
13120 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
13121 /// the elements themselves.
13122 ///
13123 /// FIXME: When the cost model will be mature enough, we can relax
13124 /// constraints (1) and (2).
13125 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13126                                 const APInt &UsedBits, bool ForCodeSize) {
13127   unsigned NumberOfSlices = LoadedSlices.size();
13128   if (StressLoadSlicing)
13129     return NumberOfSlices > 1;
13130
13131   // Check (1).
13132   if (NumberOfSlices != 2)
13133     return false;
13134
13135   // Check (2).
13136   if (!areUsedBitsDense(UsedBits))
13137     return false;
13138
13139   // Check (3).
13140   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
13141   // The original code has one big load.
13142   OrigCost.Loads = 1;
13143   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
13144     const LoadedSlice &LS = LoadedSlices[CurrSlice];
13145     // Accumulate the cost of all the slices.
13146     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
13147     GlobalSlicingCost += SliceCost;
13148
13149     // Account as cost in the original configuration the gain obtained
13150     // with the current slices.
13151     OrigCost.addSliceGain(LS);
13152   }
13153
13154   // If the target supports paired load, adjust the cost accordingly.
13155   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
13156   return OrigCost > GlobalSlicingCost;
13157 }
13158
13159 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
13160 /// operations, split it in the various pieces being extracted.
13161 ///
13162 /// This sort of thing is introduced by SROA.
13163 /// This slicing takes care not to insert overlapping loads.
13164 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
13165 bool DAGCombiner::SliceUpLoad(SDNode *N) {
13166   if (Level < AfterLegalizeDAG)
13167     return false;
13168
13169   LoadSDNode *LD = cast<LoadSDNode>(N);
13170   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
13171       !LD->getValueType(0).isInteger())
13172     return false;
13173
13174   // Keep track of already used bits to detect overlapping values.
13175   // In that case, we will just abort the transformation.
13176   APInt UsedBits(LD->getValueSizeInBits(0), 0);
13177
13178   SmallVector<LoadedSlice, 4> LoadedSlices;
13179
13180   // Check if this load is used as several smaller chunks of bits.
13181   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
13182   // of computation for each trunc.
13183   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
13184        UI != UIEnd; ++UI) {
13185     // Skip the uses of the chain.
13186     if (UI.getUse().getResNo() != 0)
13187       continue;
13188
13189     SDNode *User = *UI;
13190     unsigned Shift = 0;
13191
13192     // Check if this is a trunc(lshr).
13193     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
13194         isa<ConstantSDNode>(User->getOperand(1))) {
13195       Shift = User->getConstantOperandVal(1);
13196       User = *User->use_begin();
13197     }
13198
13199     // At this point, User is a Truncate, iff we encountered, trunc or
13200     // trunc(lshr).
13201     if (User->getOpcode() != ISD::TRUNCATE)
13202       return false;
13203
13204     // The width of the type must be a power of 2 and greater than 8-bits.
13205     // Otherwise the load cannot be represented in LLVM IR.
13206     // Moreover, if we shifted with a non-8-bits multiple, the slice
13207     // will be across several bytes. We do not support that.
13208     unsigned Width = User->getValueSizeInBits(0);
13209     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
13210       return false;
13211
13212     // Build the slice for this chain of computations.
13213     LoadedSlice LS(User, LD, Shift, &DAG);
13214     APInt CurrentUsedBits = LS.getUsedBits();
13215
13216     // Check if this slice overlaps with another.
13217     if ((CurrentUsedBits & UsedBits) != 0)
13218       return false;
13219     // Update the bits used globally.
13220     UsedBits |= CurrentUsedBits;
13221
13222     // Check if the new slice would be legal.
13223     if (!LS.isLegal())
13224       return false;
13225
13226     // Record the slice.
13227     LoadedSlices.push_back(LS);
13228   }
13229
13230   // Abort slicing if it does not seem to be profitable.
13231   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
13232     return false;
13233
13234   ++SlicedLoads;
13235
13236   // Rewrite each chain to use an independent load.
13237   // By construction, each chain can be represented by a unique load.
13238
13239   // Prepare the argument for the new token factor for all the slices.
13240   SmallVector<SDValue, 8> ArgChains;
13241   for (SmallVectorImpl<LoadedSlice>::const_iterator
13242            LSIt = LoadedSlices.begin(),
13243            LSItEnd = LoadedSlices.end();
13244        LSIt != LSItEnd; ++LSIt) {
13245     SDValue SliceInst = LSIt->loadSlice();
13246     CombineTo(LSIt->Inst, SliceInst, true);
13247     if (SliceInst.getOpcode() != ISD::LOAD)
13248       SliceInst = SliceInst.getOperand(0);
13249     assert(SliceInst->getOpcode() == ISD::LOAD &&
13250            "It takes more than a zext to get to the loaded slice!!");
13251     ArgChains.push_back(SliceInst.getValue(1));
13252   }
13253
13254   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
13255                               ArgChains);
13256   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13257   AddToWorklist(Chain.getNode());
13258   return true;
13259 }
13260
13261 /// Check to see if V is (and load (ptr), imm), where the load is having
13262 /// specific bytes cleared out.  If so, return the byte size being masked out
13263 /// and the shift amount.
13264 static std::pair<unsigned, unsigned>
13265 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
13266   std::pair<unsigned, unsigned> Result(0, 0);
13267
13268   // Check for the structure we're looking for.
13269   if (V->getOpcode() != ISD::AND ||
13270       !isa<ConstantSDNode>(V->getOperand(1)) ||
13271       !ISD::isNormalLoad(V->getOperand(0).getNode()))
13272     return Result;
13273
13274   // Check the chain and pointer.
13275   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
13276   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
13277
13278   // This only handles simple types.
13279   if (V.getValueType() != MVT::i16 &&
13280       V.getValueType() != MVT::i32 &&
13281       V.getValueType() != MVT::i64)
13282     return Result;
13283
13284   // Check the constant mask.  Invert it so that the bits being masked out are
13285   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
13286   // follow the sign bit for uniformity.
13287   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
13288   unsigned NotMaskLZ = countLeadingZeros(NotMask);
13289   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
13290   unsigned NotMaskTZ = countTrailingZeros(NotMask);
13291   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
13292   if (NotMaskLZ == 64) return Result;  // All zero mask.
13293
13294   // See if we have a continuous run of bits.  If so, we have 0*1+0*
13295   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
13296     return Result;
13297
13298   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
13299   if (V.getValueType() != MVT::i64 && NotMaskLZ)
13300     NotMaskLZ -= 64-V.getValueSizeInBits();
13301
13302   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
13303   switch (MaskedBytes) {
13304   case 1:
13305   case 2:
13306   case 4: break;
13307   default: return Result; // All one mask, or 5-byte mask.
13308   }
13309
13310   // Verify that the first bit starts at a multiple of mask so that the access
13311   // is aligned the same as the access width.
13312   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
13313
13314   // For narrowing to be valid, it must be the case that the load the
13315   // immediately preceeding memory operation before the store.
13316   if (LD == Chain.getNode())
13317     ; // ok.
13318   else if (Chain->getOpcode() == ISD::TokenFactor &&
13319            SDValue(LD, 1).hasOneUse()) {
13320     // LD has only 1 chain use so they are no indirect dependencies.
13321     bool isOk = false;
13322     for (const SDValue &ChainOp : Chain->op_values())
13323       if (ChainOp.getNode() == LD) {
13324         isOk = true;
13325         break;
13326       }
13327     if (!isOk)
13328       return Result;
13329   } else
13330     return Result; // Fail.
13331
13332   Result.first = MaskedBytes;
13333   Result.second = NotMaskTZ/8;
13334   return Result;
13335 }
13336
13337 /// Check to see if IVal is something that provides a value as specified by
13338 /// MaskInfo. If so, replace the specified store with a narrower store of
13339 /// truncated IVal.
13340 static SDNode *
13341 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
13342                                 SDValue IVal, StoreSDNode *St,
13343                                 DAGCombiner *DC) {
13344   unsigned NumBytes = MaskInfo.first;
13345   unsigned ByteShift = MaskInfo.second;
13346   SelectionDAG &DAG = DC->getDAG();
13347
13348   // Check to see if IVal is all zeros in the part being masked in by the 'or'
13349   // that uses this.  If not, this is not a replacement.
13350   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
13351                                   ByteShift*8, (ByteShift+NumBytes)*8);
13352   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
13353
13354   // Check that it is legal on the target to do this.  It is legal if the new
13355   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
13356   // legalization.
13357   MVT VT = MVT::getIntegerVT(NumBytes*8);
13358   if (!DC->isTypeLegal(VT))
13359     return nullptr;
13360
13361   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
13362   // shifted by ByteShift and truncated down to NumBytes.
13363   if (ByteShift) {
13364     SDLoc DL(IVal);
13365     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
13366                        DAG.getConstant(ByteShift*8, DL,
13367                                     DC->getShiftAmountTy(IVal.getValueType())));
13368   }
13369
13370   // Figure out the offset for the store and the alignment of the access.
13371   unsigned StOffset;
13372   unsigned NewAlign = St->getAlignment();
13373
13374   if (DAG.getDataLayout().isLittleEndian())
13375     StOffset = ByteShift;
13376   else
13377     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
13378
13379   SDValue Ptr = St->getBasePtr();
13380   if (StOffset) {
13381     SDLoc DL(IVal);
13382     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
13383                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
13384     NewAlign = MinAlign(NewAlign, StOffset);
13385   }
13386
13387   // Truncate down to the new size.
13388   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
13389
13390   ++OpsNarrowed;
13391   return DAG
13392       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
13393                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
13394       .getNode();
13395 }
13396
13397 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
13398 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
13399 /// narrowing the load and store if it would end up being a win for performance
13400 /// or code size.
13401 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
13402   StoreSDNode *ST  = cast<StoreSDNode>(N);
13403   if (ST->isVolatile())
13404     return SDValue();
13405
13406   SDValue Chain = ST->getChain();
13407   SDValue Value = ST->getValue();
13408   SDValue Ptr   = ST->getBasePtr();
13409   EVT VT = Value.getValueType();
13410
13411   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
13412     return SDValue();
13413
13414   unsigned Opc = Value.getOpcode();
13415
13416   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
13417   // is a byte mask indicating a consecutive number of bytes, check to see if
13418   // Y is known to provide just those bytes.  If so, we try to replace the
13419   // load + replace + store sequence with a single (narrower) store, which makes
13420   // the load dead.
13421   if (Opc == ISD::OR) {
13422     std::pair<unsigned, unsigned> MaskedLoad;
13423     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
13424     if (MaskedLoad.first)
13425       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13426                                                   Value.getOperand(1), ST,this))
13427         return SDValue(NewST, 0);
13428
13429     // Or is commutative, so try swapping X and Y.
13430     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
13431     if (MaskedLoad.first)
13432       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13433                                                   Value.getOperand(0), ST,this))
13434         return SDValue(NewST, 0);
13435   }
13436
13437   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
13438       Value.getOperand(1).getOpcode() != ISD::Constant)
13439     return SDValue();
13440
13441   SDValue N0 = Value.getOperand(0);
13442   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13443       Chain == SDValue(N0.getNode(), 1)) {
13444     LoadSDNode *LD = cast<LoadSDNode>(N0);
13445     if (LD->getBasePtr() != Ptr ||
13446         LD->getPointerInfo().getAddrSpace() !=
13447         ST->getPointerInfo().getAddrSpace())
13448       return SDValue();
13449
13450     // Find the type to narrow it the load / op / store to.
13451     SDValue N1 = Value.getOperand(1);
13452     unsigned BitWidth = N1.getValueSizeInBits();
13453     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
13454     if (Opc == ISD::AND)
13455       Imm ^= APInt::getAllOnesValue(BitWidth);
13456     if (Imm == 0 || Imm.isAllOnesValue())
13457       return SDValue();
13458     unsigned ShAmt = Imm.countTrailingZeros();
13459     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
13460     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
13461     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13462     // The narrowing should be profitable, the load/store operation should be
13463     // legal (or custom) and the store size should be equal to the NewVT width.
13464     while (NewBW < BitWidth &&
13465            (NewVT.getStoreSizeInBits() != NewBW ||
13466             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
13467             !TLI.isNarrowingProfitable(VT, NewVT))) {
13468       NewBW = NextPowerOf2(NewBW);
13469       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13470     }
13471     if (NewBW >= BitWidth)
13472       return SDValue();
13473
13474     // If the lsb changed does not start at the type bitwidth boundary,
13475     // start at the previous one.
13476     if (ShAmt % NewBW)
13477       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
13478     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
13479                                    std::min(BitWidth, ShAmt + NewBW));
13480     if ((Imm & Mask) == Imm) {
13481       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
13482       if (Opc == ISD::AND)
13483         NewImm ^= APInt::getAllOnesValue(NewBW);
13484       uint64_t PtrOff = ShAmt / 8;
13485       // For big endian targets, we need to adjust the offset to the pointer to
13486       // load the correct bytes.
13487       if (DAG.getDataLayout().isBigEndian())
13488         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
13489
13490       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
13491       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
13492       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
13493         return SDValue();
13494
13495       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
13496                                    Ptr.getValueType(), Ptr,
13497                                    DAG.getConstant(PtrOff, SDLoc(LD),
13498                                                    Ptr.getValueType()));
13499       SDValue NewLD =
13500           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
13501                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
13502                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
13503       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
13504                                    DAG.getConstant(NewImm, SDLoc(Value),
13505                                                    NewVT));
13506       SDValue NewST =
13507           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
13508                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
13509
13510       AddToWorklist(NewPtr.getNode());
13511       AddToWorklist(NewLD.getNode());
13512       AddToWorklist(NewVal.getNode());
13513       WorklistRemover DeadNodes(*this);
13514       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
13515       ++OpsNarrowed;
13516       return NewST;
13517     }
13518   }
13519
13520   return SDValue();
13521 }
13522
13523 /// For a given floating point load / store pair, if the load value isn't used
13524 /// by any other operations, then consider transforming the pair to integer
13525 /// load / store operations if the target deems the transformation profitable.
13526 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
13527   StoreSDNode *ST  = cast<StoreSDNode>(N);
13528   SDValue Chain = ST->getChain();
13529   SDValue Value = ST->getValue();
13530   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
13531       Value.hasOneUse() &&
13532       Chain == SDValue(Value.getNode(), 1)) {
13533     LoadSDNode *LD = cast<LoadSDNode>(Value);
13534     EVT VT = LD->getMemoryVT();
13535     if (!VT.isFloatingPoint() ||
13536         VT != ST->getMemoryVT() ||
13537         LD->isNonTemporal() ||
13538         ST->isNonTemporal() ||
13539         LD->getPointerInfo().getAddrSpace() != 0 ||
13540         ST->getPointerInfo().getAddrSpace() != 0)
13541       return SDValue();
13542
13543     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
13544     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
13545         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
13546         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
13547         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
13548       return SDValue();
13549
13550     unsigned LDAlign = LD->getAlignment();
13551     unsigned STAlign = ST->getAlignment();
13552     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
13553     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
13554     if (LDAlign < ABIAlign || STAlign < ABIAlign)
13555       return SDValue();
13556
13557     SDValue NewLD =
13558         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
13559                     LD->getPointerInfo(), LDAlign);
13560
13561     SDValue NewST =
13562         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
13563                      ST->getPointerInfo(), STAlign);
13564
13565     AddToWorklist(NewLD.getNode());
13566     AddToWorklist(NewST.getNode());
13567     WorklistRemover DeadNodes(*this);
13568     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
13569     ++LdStFP2Int;
13570     return NewST;
13571   }
13572
13573   return SDValue();
13574 }
13575
13576 // This is a helper function for visitMUL to check the profitability
13577 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
13578 // MulNode is the original multiply, AddNode is (add x, c1),
13579 // and ConstNode is c2.
13580 //
13581 // If the (add x, c1) has multiple uses, we could increase
13582 // the number of adds if we make this transformation.
13583 // It would only be worth doing this if we can remove a
13584 // multiply in the process. Check for that here.
13585 // To illustrate:
13586 //     (A + c1) * c3
13587 //     (A + c2) * c3
13588 // We're checking for cases where we have common "c3 * A" expressions.
13589 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
13590                                               SDValue &AddNode,
13591                                               SDValue &ConstNode) {
13592   APInt Val;
13593
13594   // If the add only has one use, this would be OK to do.
13595   if (AddNode.getNode()->hasOneUse())
13596     return true;
13597
13598   // Walk all the users of the constant with which we're multiplying.
13599   for (SDNode *Use : ConstNode->uses()) {
13600     if (Use == MulNode) // This use is the one we're on right now. Skip it.
13601       continue;
13602
13603     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
13604       SDNode *OtherOp;
13605       SDNode *MulVar = AddNode.getOperand(0).getNode();
13606
13607       // OtherOp is what we're multiplying against the constant.
13608       if (Use->getOperand(0) == ConstNode)
13609         OtherOp = Use->getOperand(1).getNode();
13610       else
13611         OtherOp = Use->getOperand(0).getNode();
13612
13613       // Check to see if multiply is with the same operand of our "add".
13614       //
13615       //     ConstNode  = CONST
13616       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
13617       //     ...
13618       //     AddNode  = (A + c1)  <-- MulVar is A.
13619       //         = AddNode * ConstNode   <-- current visiting instruction.
13620       //
13621       // If we make this transformation, we will have a common
13622       // multiply (ConstNode * A) that we can save.
13623       if (OtherOp == MulVar)
13624         return true;
13625
13626       // Now check to see if a future expansion will give us a common
13627       // multiply.
13628       //
13629       //     ConstNode  = CONST
13630       //     AddNode    = (A + c1)
13631       //     ...   = AddNode * ConstNode <-- current visiting instruction.
13632       //     ...
13633       //     OtherOp = (A + c2)
13634       //     Use     = OtherOp * ConstNode <-- visiting Use.
13635       //
13636       // If we make this transformation, we will have a common
13637       // multiply (CONST * A) after we also do the same transformation
13638       // to the "t2" instruction.
13639       if (OtherOp->getOpcode() == ISD::ADD &&
13640           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
13641           OtherOp->getOperand(0).getNode() == MulVar)
13642         return true;
13643     }
13644   }
13645
13646   // Didn't find a case where this would be profitable.
13647   return false;
13648 }
13649
13650 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
13651                                          unsigned NumStores) {
13652   SmallVector<SDValue, 8> Chains;
13653   SmallPtrSet<const SDNode *, 8> Visited;
13654   SDLoc StoreDL(StoreNodes[0].MemNode);
13655
13656   for (unsigned i = 0; i < NumStores; ++i) {
13657     Visited.insert(StoreNodes[i].MemNode);
13658   }
13659
13660   // don't include nodes that are children
13661   for (unsigned i = 0; i < NumStores; ++i) {
13662     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
13663       Chains.push_back(StoreNodes[i].MemNode->getChain());
13664   }
13665
13666   assert(Chains.size() > 0 && "Chain should have generated a chain");
13667   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
13668 }
13669
13670 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
13671     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
13672     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
13673   // Make sure we have something to merge.
13674   if (NumStores < 2)
13675     return false;
13676
13677   // The latest Node in the DAG.
13678   SDLoc DL(StoreNodes[0].MemNode);
13679
13680   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
13681   unsigned SizeInBits = NumStores * ElementSizeBits;
13682   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
13683
13684   EVT StoreTy;
13685   if (UseVector) {
13686     unsigned Elts = NumStores * NumMemElts;
13687     // Get the type for the merged vector store.
13688     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
13689   } else
13690     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
13691
13692   SDValue StoredVal;
13693   if (UseVector) {
13694     if (IsConstantSrc) {
13695       SmallVector<SDValue, 8> BuildVector;
13696       for (unsigned I = 0; I != NumStores; ++I) {
13697         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
13698         SDValue Val = St->getValue();
13699         // If constant is of the wrong type, convert it now.
13700         if (MemVT != Val.getValueType()) {
13701           Val = peekThroughBitcast(Val);
13702           // Deal with constants of wrong size.
13703           if (ElementSizeBits != Val.getValueSizeInBits()) {
13704             EVT IntMemVT =
13705                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
13706             if (isa<ConstantFPSDNode>(Val)) {
13707               // Not clear how to truncate FP values.
13708               return false;
13709             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
13710               Val = DAG.getConstant(C->getAPIntValue()
13711                                         .zextOrTrunc(Val.getValueSizeInBits())
13712                                         .zextOrTrunc(ElementSizeBits),
13713                                     SDLoc(C), IntMemVT);
13714           }
13715           // Make sure correctly size type is the correct type.
13716           Val = DAG.getBitcast(MemVT, Val);
13717         }
13718         BuildVector.push_back(Val);
13719       }
13720       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
13721                                                : ISD::BUILD_VECTOR,
13722                               DL, StoreTy, BuildVector);
13723     } else {
13724       SmallVector<SDValue, 8> Ops;
13725       for (unsigned i = 0; i < NumStores; ++i) {
13726         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13727         SDValue Val = peekThroughBitcast(St->getValue());
13728         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
13729         // type MemVT. If the underlying value is not the correct
13730         // type, but it is an extraction of an appropriate vector we
13731         // can recast Val to be of the correct type. This may require
13732         // converting between EXTRACT_VECTOR_ELT and
13733         // EXTRACT_SUBVECTOR.
13734         if ((MemVT != Val.getValueType()) &&
13735             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
13736              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
13737           SDValue Vec = Val.getOperand(0);
13738           EVT MemVTScalarTy = MemVT.getScalarType();
13739           // We may need to add a bitcast here to get types to line up.
13740           if (MemVTScalarTy != Vec.getValueType()) {
13741             unsigned Elts = Vec.getValueType().getSizeInBits() /
13742                             MemVTScalarTy.getSizeInBits();
13743             EVT NewVecTy =
13744                 EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
13745             Vec = DAG.getBitcast(NewVecTy, Vec);
13746           }
13747           auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
13748                                         : ISD::EXTRACT_VECTOR_ELT;
13749           Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
13750         }
13751         Ops.push_back(Val);
13752       }
13753
13754       // Build the extracted vector elements back into a vector.
13755       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
13756                                                : ISD::BUILD_VECTOR,
13757                               DL, StoreTy, Ops);
13758     }
13759   } else {
13760     // We should always use a vector store when merging extracted vector
13761     // elements, so this path implies a store of constants.
13762     assert(IsConstantSrc && "Merged vector elements should use vector store");
13763
13764     APInt StoreInt(SizeInBits, 0);
13765
13766     // Construct a single integer constant which is made of the smaller
13767     // constant inputs.
13768     bool IsLE = DAG.getDataLayout().isLittleEndian();
13769     for (unsigned i = 0; i < NumStores; ++i) {
13770       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
13771       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
13772
13773       SDValue Val = St->getValue();
13774       Val = peekThroughBitcast(Val);
13775       StoreInt <<= ElementSizeBits;
13776       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
13777         StoreInt |= C->getAPIntValue()
13778                         .zextOrTrunc(ElementSizeBits)
13779                         .zextOrTrunc(SizeInBits);
13780       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
13781         StoreInt |= C->getValueAPF()
13782                         .bitcastToAPInt()
13783                         .zextOrTrunc(ElementSizeBits)
13784                         .zextOrTrunc(SizeInBits);
13785         // If fp truncation is necessary give up for now.
13786         if (MemVT.getSizeInBits() != ElementSizeBits)
13787           return false;
13788       } else {
13789         llvm_unreachable("Invalid constant element type");
13790       }
13791     }
13792
13793     // Create the new Load and Store operations.
13794     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
13795   }
13796
13797   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13798   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
13799
13800   // make sure we use trunc store if it's necessary to be legal.
13801   SDValue NewStore;
13802   if (!UseTrunc) {
13803     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
13804                             FirstInChain->getPointerInfo(),
13805                             FirstInChain->getAlignment());
13806   } else { // Must be realized as a trunc store
13807     EVT LegalizedStoredValTy =
13808         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
13809     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
13810     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
13811     SDValue ExtendedStoreVal =
13812         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
13813                         LegalizedStoredValTy);
13814     NewStore = DAG.getTruncStore(
13815         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
13816         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
13817         FirstInChain->getAlignment(),
13818         FirstInChain->getMemOperand()->getFlags());
13819   }
13820
13821   // Replace all merged stores with the new store.
13822   for (unsigned i = 0; i < NumStores; ++i)
13823     CombineTo(StoreNodes[i].MemNode, NewStore);
13824
13825   AddToWorklist(NewChain.getNode());
13826   return true;
13827 }
13828
13829 void DAGCombiner::getStoreMergeCandidates(
13830     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
13831     SDNode *&RootNode) {
13832   // This holds the base pointer, index, and the offset in bytes from the base
13833   // pointer.
13834   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
13835   EVT MemVT = St->getMemoryVT();
13836
13837   SDValue Val = peekThroughBitcast(St->getValue());
13838   // We must have a base and an offset.
13839   if (!BasePtr.getBase().getNode())
13840     return;
13841
13842   // Do not handle stores to undef base pointers.
13843   if (BasePtr.getBase().isUndef())
13844     return;
13845
13846   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
13847   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
13848                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
13849   bool IsLoadSrc = isa<LoadSDNode>(Val);
13850   BaseIndexOffset LBasePtr;
13851   // Match on loadbaseptr if relevant.
13852   EVT LoadVT;
13853   if (IsLoadSrc) {
13854     auto *Ld = cast<LoadSDNode>(Val);
13855     LBasePtr = BaseIndexOffset::match(Ld, DAG);
13856     LoadVT = Ld->getMemoryVT();
13857     // Load and store should be the same type.
13858     if (MemVT != LoadVT)
13859       return;
13860     // Loads must only have one use.
13861     if (!Ld->hasNUsesOfValue(1, 0))
13862       return;
13863     // The memory operands must not be volatile.
13864     if (Ld->isVolatile() || Ld->isIndexed())
13865       return;
13866   }
13867   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
13868                             int64_t &Offset) -> bool {
13869     if (Other->isVolatile() || Other->isIndexed())
13870       return false;
13871     SDValue Val = peekThroughBitcast(Other->getValue());
13872     // Allow merging constants of different types as integers.
13873     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
13874                                            : Other->getMemoryVT() != MemVT;
13875     if (IsLoadSrc) {
13876       if (NoTypeMatch)
13877         return false;
13878       // The Load's Base Ptr must also match
13879       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
13880         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
13881         if (LoadVT != OtherLd->getMemoryVT())
13882           return false;
13883         // Loads must only have one use.
13884         if (!OtherLd->hasNUsesOfValue(1, 0))
13885           return false;
13886         // The memory operands must not be volatile.
13887         if (OtherLd->isVolatile() || OtherLd->isIndexed())
13888           return false;
13889         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
13890           return false;
13891       } else
13892         return false;
13893     }
13894     if (IsConstantSrc) {
13895       if (NoTypeMatch)
13896         return false;
13897       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
13898         return false;
13899     }
13900     if (IsExtractVecSrc) {
13901       // Do not merge truncated stores here.
13902       if (Other->isTruncatingStore())
13903         return false;
13904       if (!MemVT.bitsEq(Val.getValueType()))
13905         return false;
13906       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
13907           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13908         return false;
13909     }
13910     Ptr = BaseIndexOffset::match(Other, DAG);
13911     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
13912   };
13913
13914   // We looking for a root node which is an ancestor to all mergable
13915   // stores. We search up through a load, to our root and then down
13916   // through all children. For instance we will find Store{1,2,3} if
13917   // St is Store1, Store2. or Store3 where the root is not a load
13918   // which always true for nonvolatile ops. TODO: Expand
13919   // the search to find all valid candidates through multiple layers of loads.
13920   //
13921   // Root
13922   // |-------|-------|
13923   // Load    Load    Store3
13924   // |       |
13925   // Store1   Store2
13926   //
13927   // FIXME: We should be able to climb and
13928   // descend TokenFactors to find candidates as well.
13929
13930   RootNode = St->getChain().getNode();
13931
13932   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
13933     RootNode = Ldn->getChain().getNode();
13934     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13935       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
13936         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
13937           if (I2.getOperandNo() == 0)
13938             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
13939               BaseIndexOffset Ptr;
13940               int64_t PtrDiff;
13941               if (CandidateMatch(OtherST, Ptr, PtrDiff))
13942                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13943             }
13944   } else
13945     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13946       if (I.getOperandNo() == 0)
13947         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
13948           BaseIndexOffset Ptr;
13949           int64_t PtrDiff;
13950           if (CandidateMatch(OtherST, Ptr, PtrDiff))
13951             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13952         }
13953 }
13954
13955 // We need to check that merging these stores does not cause a loop in
13956 // the DAG. Any store candidate may depend on another candidate
13957 // indirectly through its operand (we already consider dependencies
13958 // through the chain). Check in parallel by searching up from
13959 // non-chain operands of candidates.
13960 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
13961     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
13962     SDNode *RootNode) {
13963   // FIXME: We should be able to truncate a full search of
13964   // predecessors by doing a BFS and keeping tabs the originating
13965   // stores from which worklist nodes come from in a similar way to
13966   // TokenFactor simplfication.
13967
13968   SmallPtrSet<const SDNode *, 32> Visited;
13969   SmallVector<const SDNode *, 8> Worklist;
13970
13971   // RootNode is a predecessor to all candidates so we need not search
13972   // past it. Add RootNode (peeking through TokenFactors). Do not count
13973   // these towards size check.
13974
13975   Worklist.push_back(RootNode);
13976   while (!Worklist.empty()) {
13977     auto N = Worklist.pop_back_val();
13978     if (!Visited.insert(N).second)
13979       continue; // Already present in Visited.
13980     if (N->getOpcode() == ISD::TokenFactor) {
13981       for (SDValue Op : N->ops())
13982         Worklist.push_back(Op.getNode());
13983     }
13984   }
13985
13986   // Don't count pruning nodes towards max.
13987   unsigned int Max = 1024 + Visited.size();
13988   // Search Ops of store candidates.
13989   for (unsigned i = 0; i < NumStores; ++i) {
13990     SDNode *N = StoreNodes[i].MemNode;
13991     // Of the 4 Store Operands:
13992     //   * Chain (Op 0) -> We have already considered these
13993     //                    in candidate selection and can be
13994     //                    safely ignored
13995     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
13996     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant
13997     //                      and so no cycles are possible.
13998     //   * (Op 3) -> appears to always be undef. Cannot be source of cycle.
13999     //
14000     // Thus we need only check predecessors of the value operands.
14001     auto *Op = N->getOperand(1).getNode();
14002     if (Visited.insert(Op).second)
14003       Worklist.push_back(Op);
14004   }
14005   // Search through DAG. We can stop early if we find a store node.
14006   for (unsigned i = 0; i < NumStores; ++i)
14007     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
14008                                      Max))
14009       return false;
14010   return true;
14011 }
14012
14013 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
14014   if (OptLevel == CodeGenOpt::None)
14015     return false;
14016
14017   EVT MemVT = St->getMemoryVT();
14018   int64_t ElementSizeBytes = MemVT.getStoreSize();
14019   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14020
14021   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
14022     return false;
14023
14024   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
14025       Attribute::NoImplicitFloat);
14026
14027   // This function cannot currently deal with non-byte-sized memory sizes.
14028   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
14029     return false;
14030
14031   if (!MemVT.isSimple())
14032     return false;
14033
14034   // Perform an early exit check. Do not bother looking at stored values that
14035   // are not constants, loads, or extracted vector elements.
14036   SDValue StoredVal = peekThroughBitcast(St->getValue());
14037   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
14038   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
14039                        isa<ConstantFPSDNode>(StoredVal);
14040   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14041                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14042
14043   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
14044     return false;
14045
14046   SmallVector<MemOpLink, 8> StoreNodes;
14047   SDNode *RootNode;
14048   // Find potential store merge candidates by searching through chain sub-DAG
14049   getStoreMergeCandidates(St, StoreNodes, RootNode);
14050
14051   // Check if there is anything to merge.
14052   if (StoreNodes.size() < 2)
14053     return false;
14054
14055   // Sort the memory operands according to their distance from the
14056   // base pointer.
14057   llvm::sort(StoreNodes.begin(), StoreNodes.end(),
14058              [](MemOpLink LHS, MemOpLink RHS) {
14059                return LHS.OffsetFromBase < RHS.OffsetFromBase;
14060              });
14061
14062   // Store Merge attempts to merge the lowest stores. This generally
14063   // works out as if successful, as the remaining stores are checked
14064   // after the first collection of stores is merged. However, in the
14065   // case that a non-mergeable store is found first, e.g., {p[-2],
14066   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
14067   // mergeable cases. To prevent this, we prune such stores from the
14068   // front of StoreNodes here.
14069
14070   bool RV = false;
14071   while (StoreNodes.size() > 1) {
14072     unsigned StartIdx = 0;
14073     while ((StartIdx + 1 < StoreNodes.size()) &&
14074            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
14075                StoreNodes[StartIdx + 1].OffsetFromBase)
14076       ++StartIdx;
14077
14078     // Bail if we don't have enough candidates to merge.
14079     if (StartIdx + 1 >= StoreNodes.size())
14080       return RV;
14081
14082     if (StartIdx)
14083       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
14084
14085     // Scan the memory operations on the chain and find the first
14086     // non-consecutive store memory address.
14087     unsigned NumConsecutiveStores = 1;
14088     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
14089     // Check that the addresses are consecutive starting from the second
14090     // element in the list of stores.
14091     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
14092       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
14093       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14094         break;
14095       NumConsecutiveStores = i + 1;
14096     }
14097
14098     if (NumConsecutiveStores < 2) {
14099       StoreNodes.erase(StoreNodes.begin(),
14100                        StoreNodes.begin() + NumConsecutiveStores);
14101       continue;
14102     }
14103
14104     // The node with the lowest store address.
14105     LLVMContext &Context = *DAG.getContext();
14106     const DataLayout &DL = DAG.getDataLayout();
14107
14108     // Store the constants into memory as one consecutive store.
14109     if (IsConstantSrc) {
14110       while (NumConsecutiveStores >= 2) {
14111         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14112         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14113         unsigned FirstStoreAlign = FirstInChain->getAlignment();
14114         unsigned LastLegalType = 1;
14115         unsigned LastLegalVectorType = 1;
14116         bool LastIntegerTrunc = false;
14117         bool NonZero = false;
14118         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
14119         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14120           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
14121           SDValue StoredVal = ST->getValue();
14122           bool IsElementZero = false;
14123           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
14124             IsElementZero = C->isNullValue();
14125           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
14126             IsElementZero = C->getConstantFPValue()->isNullValue();
14127           if (IsElementZero) {
14128             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
14129               FirstZeroAfterNonZero = i;
14130           }
14131           NonZero |= !IsElementZero;
14132
14133           // Find a legal type for the constant store.
14134           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14135           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14136           bool IsFast = false;
14137
14138           // Break early when size is too large to be legal.
14139           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14140             break;
14141
14142           if (TLI.isTypeLegal(StoreTy) &&
14143               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14144               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14145                                      FirstStoreAlign, &IsFast) &&
14146               IsFast) {
14147             LastIntegerTrunc = false;
14148             LastLegalType = i + 1;
14149             // Or check whether a truncstore is legal.
14150           } else if (TLI.getTypeAction(Context, StoreTy) ==
14151                      TargetLowering::TypePromoteInteger) {
14152             EVT LegalizedStoredValTy =
14153                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
14154             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14155                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14156                 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14157                                        FirstStoreAlign, &IsFast) &&
14158                 IsFast) {
14159               LastIntegerTrunc = true;
14160               LastLegalType = i + 1;
14161             }
14162           }
14163
14164           // We only use vectors if the constant is known to be zero or the
14165           // target allows it and the function is not marked with the
14166           // noimplicitfloat attribute.
14167           if ((!NonZero ||
14168                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
14169               !NoVectors) {
14170             // Find a legal type for the vector store.
14171             unsigned Elts = (i + 1) * NumMemElts;
14172             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14173             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
14174                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14175                 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14176                                        FirstStoreAlign, &IsFast) &&
14177                 IsFast)
14178               LastLegalVectorType = i + 1;
14179           }
14180         }
14181
14182         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
14183         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
14184
14185         // Check if we found a legal integer type that creates a meaningful
14186         // merge.
14187         if (NumElem < 2) {
14188           // We know that candidate stores are in order and of correct
14189           // shape. While there is no mergeable sequence from the
14190           // beginning one may start later in the sequence. The only
14191           // reason a merge of size N could have failed where another of
14192           // the same size would not have, is if the alignment has
14193           // improved or we've dropped a non-zero value. Drop as many
14194           // candidates as we can here.
14195           unsigned NumSkip = 1;
14196           while (
14197               (NumSkip < NumConsecutiveStores) &&
14198               (NumSkip < FirstZeroAfterNonZero) &&
14199               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14200             NumSkip++;
14201
14202           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14203           NumConsecutiveStores -= NumSkip;
14204           continue;
14205         }
14206
14207         // Check that we can merge these candidates without causing a cycle.
14208         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14209                                                       RootNode)) {
14210           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14211           NumConsecutiveStores -= NumElem;
14212           continue;
14213         }
14214
14215         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
14216                                               UseVector, LastIntegerTrunc);
14217
14218         // Remove merged stores for next iteration.
14219         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14220         NumConsecutiveStores -= NumElem;
14221       }
14222       continue;
14223     }
14224
14225     // When extracting multiple vector elements, try to store them
14226     // in one vector store rather than a sequence of scalar stores.
14227     if (IsExtractVecSrc) {
14228       // Loop on Consecutive Stores on success.
14229       while (NumConsecutiveStores >= 2) {
14230         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14231         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14232         unsigned FirstStoreAlign = FirstInChain->getAlignment();
14233         unsigned NumStoresToMerge = 1;
14234         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14235           // Find a legal type for the vector store.
14236           unsigned Elts = (i + 1) * NumMemElts;
14237           EVT Ty =
14238               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14239           bool IsFast;
14240
14241           // Break early when size is too large to be legal.
14242           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
14243             break;
14244
14245           if (TLI.isTypeLegal(Ty) &&
14246               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14247               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14248                                      FirstStoreAlign, &IsFast) &&
14249               IsFast)
14250             NumStoresToMerge = i + 1;
14251         }
14252
14253         // Check if we found a legal integer type creating a meaningful
14254         // merge.
14255         if (NumStoresToMerge < 2) {
14256           // We know that candidate stores are in order and of correct
14257           // shape. While there is no mergeable sequence from the
14258           // beginning one may start later in the sequence. The only
14259           // reason a merge of size N could have failed where another of
14260           // the same size would not have, is if the alignment has
14261           // improved. Drop as many candidates as we can here.
14262           unsigned NumSkip = 1;
14263           while (
14264               (NumSkip < NumConsecutiveStores) &&
14265               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14266             NumSkip++;
14267
14268           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14269           NumConsecutiveStores -= NumSkip;
14270           continue;
14271         }
14272
14273         // Check that we can merge these candidates without causing a cycle.
14274         if (!checkMergeStoreCandidatesForDependencies(
14275                 StoreNodes, NumStoresToMerge, RootNode)) {
14276           StoreNodes.erase(StoreNodes.begin(),
14277                            StoreNodes.begin() + NumStoresToMerge);
14278           NumConsecutiveStores -= NumStoresToMerge;
14279           continue;
14280         }
14281
14282         RV |= MergeStoresOfConstantsOrVecElts(
14283             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
14284
14285         StoreNodes.erase(StoreNodes.begin(),
14286                          StoreNodes.begin() + NumStoresToMerge);
14287         NumConsecutiveStores -= NumStoresToMerge;
14288       }
14289       continue;
14290     }
14291
14292     // Below we handle the case of multiple consecutive stores that
14293     // come from multiple consecutive loads. We merge them into a single
14294     // wide load and a single wide store.
14295
14296     // Look for load nodes which are used by the stored values.
14297     SmallVector<MemOpLink, 8> LoadNodes;
14298
14299     // Find acceptable loads. Loads need to have the same chain (token factor),
14300     // must not be zext, volatile, indexed, and they must be consecutive.
14301     BaseIndexOffset LdBasePtr;
14302
14303     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14304       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14305       SDValue Val = peekThroughBitcast(St->getValue());
14306       LoadSDNode *Ld = cast<LoadSDNode>(Val);
14307
14308       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
14309       // If this is not the first ptr that we check.
14310       int64_t LdOffset = 0;
14311       if (LdBasePtr.getBase().getNode()) {
14312         // The base ptr must be the same.
14313         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
14314           break;
14315       } else {
14316         // Check that all other base pointers are the same as this one.
14317         LdBasePtr = LdPtr;
14318       }
14319
14320       // We found a potential memory operand to merge.
14321       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
14322     }
14323
14324     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
14325       // If we have load/store pair instructions and we only have two values,
14326       // don't bother merging.
14327       unsigned RequiredAlignment;
14328       if (LoadNodes.size() == 2 &&
14329           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
14330           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
14331         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
14332         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
14333         break;
14334       }
14335       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14336       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14337       unsigned FirstStoreAlign = FirstInChain->getAlignment();
14338       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
14339       unsigned FirstLoadAS = FirstLoad->getAddressSpace();
14340       unsigned FirstLoadAlign = FirstLoad->getAlignment();
14341
14342       // Scan the memory operations on the chain and find the first
14343       // non-consecutive load memory address. These variables hold the index in
14344       // the store node array.
14345
14346       unsigned LastConsecutiveLoad = 1;
14347
14348       // This variable refers to the size and not index in the array.
14349       unsigned LastLegalVectorType = 1;
14350       unsigned LastLegalIntegerType = 1;
14351       bool isDereferenceable = true;
14352       bool DoIntegerTruncate = false;
14353       StartAddress = LoadNodes[0].OffsetFromBase;
14354       SDValue FirstChain = FirstLoad->getChain();
14355       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
14356         // All loads must share the same chain.
14357         if (LoadNodes[i].MemNode->getChain() != FirstChain)
14358           break;
14359
14360         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
14361         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14362           break;
14363         LastConsecutiveLoad = i;
14364
14365         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
14366           isDereferenceable = false;
14367
14368         // Find a legal type for the vector store.
14369         unsigned Elts = (i + 1) * NumMemElts;
14370         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14371
14372         // Break early when size is too large to be legal.
14373         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14374           break;
14375
14376         bool IsFastSt, IsFastLd;
14377         if (TLI.isTypeLegal(StoreTy) &&
14378             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14379             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14380                                    FirstStoreAlign, &IsFastSt) &&
14381             IsFastSt &&
14382             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14383                                    FirstLoadAlign, &IsFastLd) &&
14384             IsFastLd) {
14385           LastLegalVectorType = i + 1;
14386         }
14387
14388         // Find a legal type for the integer store.
14389         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14390         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14391         if (TLI.isTypeLegal(StoreTy) &&
14392             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14393             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14394                                    FirstStoreAlign, &IsFastSt) &&
14395             IsFastSt &&
14396             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14397                                    FirstLoadAlign, &IsFastLd) &&
14398             IsFastLd) {
14399           LastLegalIntegerType = i + 1;
14400           DoIntegerTruncate = false;
14401           // Or check whether a truncstore and extload is legal.
14402         } else if (TLI.getTypeAction(Context, StoreTy) ==
14403                    TargetLowering::TypePromoteInteger) {
14404           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
14405           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14406               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14407               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
14408                                  StoreTy) &&
14409               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
14410                                  StoreTy) &&
14411               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
14412               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14413                                      FirstStoreAlign, &IsFastSt) &&
14414               IsFastSt &&
14415               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14416                                      FirstLoadAlign, &IsFastLd) &&
14417               IsFastLd) {
14418             LastLegalIntegerType = i + 1;
14419             DoIntegerTruncate = true;
14420           }
14421         }
14422       }
14423
14424       // Only use vector types if the vector type is larger than the integer
14425       // type. If they are the same, use integers.
14426       bool UseVectorTy =
14427           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
14428       unsigned LastLegalType =
14429           std::max(LastLegalVectorType, LastLegalIntegerType);
14430
14431       // We add +1 here because the LastXXX variables refer to location while
14432       // the NumElem refers to array/index size.
14433       unsigned NumElem =
14434           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
14435       NumElem = std::min(LastLegalType, NumElem);
14436
14437       if (NumElem < 2) {
14438         // We know that candidate stores are in order and of correct
14439         // shape. While there is no mergeable sequence from the
14440         // beginning one may start later in the sequence. The only
14441         // reason a merge of size N could have failed where another of
14442         // the same size would not have is if the alignment or either
14443         // the load or store has improved. Drop as many candidates as we
14444         // can here.
14445         unsigned NumSkip = 1;
14446         while ((NumSkip < LoadNodes.size()) &&
14447                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
14448                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14449           NumSkip++;
14450         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14451         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
14452         NumConsecutiveStores -= NumSkip;
14453         continue;
14454       }
14455
14456       // Check that we can merge these candidates without causing a cycle.
14457       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14458                                                     RootNode)) {
14459         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14460         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14461         NumConsecutiveStores -= NumElem;
14462         continue;
14463       }
14464
14465       // Find if it is better to use vectors or integers to load and store
14466       // to memory.
14467       EVT JointMemOpVT;
14468       if (UseVectorTy) {
14469         // Find a legal type for the vector store.
14470         unsigned Elts = NumElem * NumMemElts;
14471         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14472       } else {
14473         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
14474         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
14475       }
14476
14477       SDLoc LoadDL(LoadNodes[0].MemNode);
14478       SDLoc StoreDL(StoreNodes[0].MemNode);
14479
14480       // The merged loads are required to have the same incoming chain, so
14481       // using the first's chain is acceptable.
14482
14483       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
14484       AddToWorklist(NewStoreChain.getNode());
14485
14486       MachineMemOperand::Flags MMOFlags =
14487           isDereferenceable ? MachineMemOperand::MODereferenceable
14488                             : MachineMemOperand::MONone;
14489
14490       SDValue NewLoad, NewStore;
14491       if (UseVectorTy || !DoIntegerTruncate) {
14492         NewLoad =
14493             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
14494                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14495                         FirstLoadAlign, MMOFlags);
14496         NewStore = DAG.getStore(
14497             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
14498             FirstInChain->getPointerInfo(), FirstStoreAlign);
14499       } else { // This must be the truncstore/extload case
14500         EVT ExtendedTy =
14501             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
14502         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
14503                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
14504                                  FirstLoad->getPointerInfo(), JointMemOpVT,
14505                                  FirstLoadAlign, MMOFlags);
14506         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
14507                                      FirstInChain->getBasePtr(),
14508                                      FirstInChain->getPointerInfo(),
14509                                      JointMemOpVT, FirstInChain->getAlignment(),
14510                                      FirstInChain->getMemOperand()->getFlags());
14511       }
14512
14513       // Transfer chain users from old loads to the new load.
14514       for (unsigned i = 0; i < NumElem; ++i) {
14515         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
14516         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
14517                                       SDValue(NewLoad.getNode(), 1));
14518       }
14519
14520       // Replace the all stores with the new store. Recursively remove
14521       // corresponding value if its no longer used.
14522       for (unsigned i = 0; i < NumElem; ++i) {
14523         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
14524         CombineTo(StoreNodes[i].MemNode, NewStore);
14525         if (Val.getNode()->use_empty())
14526           recursivelyDeleteUnusedNodes(Val.getNode());
14527       }
14528
14529       RV = true;
14530       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14531       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14532       NumConsecutiveStores -= NumElem;
14533     }
14534   }
14535   return RV;
14536 }
14537
14538 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
14539   SDLoc SL(ST);
14540   SDValue ReplStore;
14541
14542   // Replace the chain to avoid dependency.
14543   if (ST->isTruncatingStore()) {
14544     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
14545                                   ST->getBasePtr(), ST->getMemoryVT(),
14546                                   ST->getMemOperand());
14547   } else {
14548     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
14549                              ST->getMemOperand());
14550   }
14551
14552   // Create token to keep both nodes around.
14553   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
14554                               MVT::Other, ST->getChain(), ReplStore);
14555
14556   // Make sure the new and old chains are cleaned up.
14557   AddToWorklist(Token.getNode());
14558
14559   // Don't add users to work list.
14560   return CombineTo(ST, Token, false);
14561 }
14562
14563 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
14564   SDValue Value = ST->getValue();
14565   if (Value.getOpcode() == ISD::TargetConstantFP)
14566     return SDValue();
14567
14568   SDLoc DL(ST);
14569
14570   SDValue Chain = ST->getChain();
14571   SDValue Ptr = ST->getBasePtr();
14572
14573   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
14574
14575   // NOTE: If the original store is volatile, this transform must not increase
14576   // the number of stores.  For example, on x86-32 an f64 can be stored in one
14577   // processor operation but an i64 (which is not legal) requires two.  So the
14578   // transform should not be done in this case.
14579
14580   SDValue Tmp;
14581   switch (CFP->getSimpleValueType(0).SimpleTy) {
14582   default:
14583     llvm_unreachable("Unknown FP type");
14584   case MVT::f16:    // We don't do this for these yet.
14585   case MVT::f80:
14586   case MVT::f128:
14587   case MVT::ppcf128:
14588     return SDValue();
14589   case MVT::f32:
14590     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
14591         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
14592       ;
14593       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
14594                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
14595                             MVT::i32);
14596       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
14597     }
14598
14599     return SDValue();
14600   case MVT::f64:
14601     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
14602          !ST->isVolatile()) ||
14603         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
14604       ;
14605       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
14606                             getZExtValue(), SDLoc(CFP), MVT::i64);
14607       return DAG.getStore(Chain, DL, Tmp,
14608                           Ptr, ST->getMemOperand());
14609     }
14610
14611     if (!ST->isVolatile() &&
14612         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
14613       // Many FP stores are not made apparent until after legalize, e.g. for
14614       // argument passing.  Since this is so common, custom legalize the
14615       // 64-bit integer store into two 32-bit stores.
14616       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
14617       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
14618       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
14619       if (DAG.getDataLayout().isBigEndian())
14620         std::swap(Lo, Hi);
14621
14622       unsigned Alignment = ST->getAlignment();
14623       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
14624       AAMDNodes AAInfo = ST->getAAInfo();
14625
14626       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
14627                                  ST->getAlignment(), MMOFlags, AAInfo);
14628       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
14629                         DAG.getConstant(4, DL, Ptr.getValueType()));
14630       Alignment = MinAlign(Alignment, 4U);
14631       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
14632                                  ST->getPointerInfo().getWithOffset(4),
14633                                  Alignment, MMOFlags, AAInfo);
14634       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
14635                          St0, St1);
14636     }
14637
14638     return SDValue();
14639   }
14640 }
14641
14642 SDValue DAGCombiner::visitSTORE(SDNode *N) {
14643   StoreSDNode *ST  = cast<StoreSDNode>(N);
14644   SDValue Chain = ST->getChain();
14645   SDValue Value = ST->getValue();
14646   SDValue Ptr   = ST->getBasePtr();
14647
14648   // If this is a store of a bit convert, store the input value if the
14649   // resultant store does not need a higher alignment than the original.
14650   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
14651       ST->isUnindexed()) {
14652     EVT SVT = Value.getOperand(0).getValueType();
14653     if (((!LegalOperations && !ST->isVolatile()) ||
14654          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
14655         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
14656       unsigned OrigAlign = ST->getAlignment();
14657       bool Fast = false;
14658       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
14659                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
14660           Fast) {
14661         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
14662                             ST->getPointerInfo(), OrigAlign,
14663                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
14664       }
14665     }
14666   }
14667
14668   // Turn 'store undef, Ptr' -> nothing.
14669   if (Value.isUndef() && ST->isUnindexed())
14670     return Chain;
14671
14672   // Try to infer better alignment information than the store already has.
14673   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
14674     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14675       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
14676         SDValue NewStore =
14677             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
14678                               ST->getMemoryVT(), Align,
14679                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
14680         // NewStore will always be N as we are only refining the alignment
14681         assert(NewStore.getNode() == N);
14682         (void)NewStore;
14683       }
14684     }
14685   }
14686
14687   // Try transforming a pair floating point load / store ops to integer
14688   // load / store ops.
14689   if (SDValue NewST = TransformFPLoadStorePair(N))
14690     return NewST;
14691
14692   if (ST->isUnindexed()) {
14693     // Walk up chain skipping non-aliasing memory nodes, on this store and any
14694     // adjacent stores.
14695     if (findBetterNeighborChains(ST)) {
14696       // replaceStoreChain uses CombineTo, which handled all of the worklist
14697       // manipulation. Return the original node to not do anything else.
14698       return SDValue(ST, 0);
14699     }
14700     Chain = ST->getChain();
14701   }
14702
14703   // FIXME: is there such a thing as a truncating indexed store?
14704   if (ST->isTruncatingStore() && ST->isUnindexed() &&
14705       Value.getValueType().isInteger()) {
14706     // See if we can simplify the input to this truncstore with knowledge that
14707     // only the low bits are being used.  For example:
14708     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
14709     SDValue Shorter = DAG.GetDemandedBits(
14710         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
14711                                     ST->getMemoryVT().getScalarSizeInBits()));
14712     AddToWorklist(Value.getNode());
14713     if (Shorter.getNode())
14714       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
14715                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
14716
14717     // Otherwise, see if we can simplify the operation with
14718     // SimplifyDemandedBits, which only works if the value has a single use.
14719     if (SimplifyDemandedBits(
14720             Value,
14721             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
14722                                  ST->getMemoryVT().getScalarSizeInBits()))) {
14723       // Re-visit the store if anything changed and the store hasn't been merged
14724       // with another node (N is deleted) SimplifyDemandedBits will add Value's
14725       // node back to the worklist if necessary, but we also need to re-visit
14726       // the Store node itself.
14727       if (N->getOpcode() != ISD::DELETED_NODE)
14728         AddToWorklist(N);
14729       return SDValue(N, 0);
14730     }
14731   }
14732
14733   // If this is a load followed by a store to the same location, then the store
14734   // is dead/noop.
14735   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
14736     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
14737         ST->isUnindexed() && !ST->isVolatile() &&
14738         // There can't be any side effects between the load and store, such as
14739         // a call or store.
14740         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
14741       // The store is dead, remove it.
14742       return Chain;
14743     }
14744   }
14745
14746   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
14747     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
14748         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
14749         ST->getMemoryVT() == ST1->getMemoryVT()) {
14750       // If this is a store followed by a store with the same value to the same
14751       // location, then the store is dead/noop.
14752       if (ST1->getValue() == Value) {
14753         // The store is dead, remove it.
14754         return Chain;
14755       }
14756
14757       // If this is a store who's preceeding store to the same location
14758       // and no one other node is chained to that store we can effectively
14759       // drop the store. Do not remove stores to undef as they may be used as
14760       // data sinks.
14761       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
14762           !ST1->getBasePtr().isUndef()) {
14763         // ST1 is fully overwritten and can be elided. Combine with it's chain
14764         // value.
14765         CombineTo(ST1, ST1->getChain());
14766         return SDValue();
14767       }
14768     }
14769   }
14770
14771   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
14772   // truncating store.  We can do this even if this is already a truncstore.
14773   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
14774       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
14775       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
14776                             ST->getMemoryVT())) {
14777     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
14778                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
14779   }
14780
14781   // Always perform this optimization before types are legal. If the target
14782   // prefers, also try this after legalization to catch stores that were created
14783   // by intrinsics or other nodes.
14784   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
14785     while (true) {
14786       // There can be multiple store sequences on the same chain.
14787       // Keep trying to merge store sequences until we are unable to do so
14788       // or until we merge the last store on the chain.
14789       bool Changed = MergeConsecutiveStores(ST);
14790       if (!Changed) break;
14791       // Return N as merge only uses CombineTo and no worklist clean
14792       // up is necessary.
14793       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
14794         return SDValue(N, 0);
14795     }
14796   }
14797
14798   // Try transforming N to an indexed store.
14799   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14800     return SDValue(N, 0);
14801
14802   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
14803   //
14804   // Make sure to do this only after attempting to merge stores in order to
14805   //  avoid changing the types of some subset of stores due to visit order,
14806   //  preventing their merging.
14807   if (isa<ConstantFPSDNode>(ST->getValue())) {
14808     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
14809       return NewSt;
14810   }
14811
14812   if (SDValue NewSt = splitMergedValStore(ST))
14813     return NewSt;
14814
14815   return ReduceLoadOpStoreWidth(N);
14816 }
14817
14818 /// For the instruction sequence of store below, F and I values
14819 /// are bundled together as an i64 value before being stored into memory.
14820 /// Sometimes it is more efficent to generate separate stores for F and I,
14821 /// which can remove the bitwise instructions or sink them to colder places.
14822 ///
14823 ///   (store (or (zext (bitcast F to i32) to i64),
14824 ///              (shl (zext I to i64), 32)), addr)  -->
14825 ///   (store F, addr) and (store I, addr+4)
14826 ///
14827 /// Similarly, splitting for other merged store can also be beneficial, like:
14828 /// For pair of {i32, i32}, i64 store --> two i32 stores.
14829 /// For pair of {i32, i16}, i64 store --> two i32 stores.
14830 /// For pair of {i16, i16}, i32 store --> two i16 stores.
14831 /// For pair of {i16, i8},  i32 store --> two i16 stores.
14832 /// For pair of {i8, i8},   i16 store --> two i8 stores.
14833 ///
14834 /// We allow each target to determine specifically which kind of splitting is
14835 /// supported.
14836 ///
14837 /// The store patterns are commonly seen from the simple code snippet below
14838 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
14839 ///   void goo(const std::pair<int, float> &);
14840 ///   hoo() {
14841 ///     ...
14842 ///     goo(std::make_pair(tmp, ftmp));
14843 ///     ...
14844 ///   }
14845 ///
14846 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
14847   if (OptLevel == CodeGenOpt::None)
14848     return SDValue();
14849
14850   SDValue Val = ST->getValue();
14851   SDLoc DL(ST);
14852
14853   // Match OR operand.
14854   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
14855     return SDValue();
14856
14857   // Match SHL operand and get Lower and Higher parts of Val.
14858   SDValue Op1 = Val.getOperand(0);
14859   SDValue Op2 = Val.getOperand(1);
14860   SDValue Lo, Hi;
14861   if (Op1.getOpcode() != ISD::SHL) {
14862     std::swap(Op1, Op2);
14863     if (Op1.getOpcode() != ISD::SHL)
14864       return SDValue();
14865   }
14866   Lo = Op2;
14867   Hi = Op1.getOperand(0);
14868   if (!Op1.hasOneUse())
14869     return SDValue();
14870
14871   // Match shift amount to HalfValBitSize.
14872   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
14873   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
14874   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
14875     return SDValue();
14876
14877   // Lo and Hi are zero-extended from int with size less equal than 32
14878   // to i64.
14879   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
14880       !Lo.getOperand(0).getValueType().isScalarInteger() ||
14881       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
14882       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
14883       !Hi.getOperand(0).getValueType().isScalarInteger() ||
14884       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
14885     return SDValue();
14886
14887   // Use the EVT of low and high parts before bitcast as the input
14888   // of target query.
14889   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
14890                   ? Lo.getOperand(0).getValueType()
14891                   : Lo.getValueType();
14892   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
14893                    ? Hi.getOperand(0).getValueType()
14894                    : Hi.getValueType();
14895   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
14896     return SDValue();
14897
14898   // Start to split store.
14899   unsigned Alignment = ST->getAlignment();
14900   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
14901   AAMDNodes AAInfo = ST->getAAInfo();
14902
14903   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
14904   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
14905   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
14906   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
14907
14908   SDValue Chain = ST->getChain();
14909   SDValue Ptr = ST->getBasePtr();
14910   // Lower value store.
14911   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
14912                              ST->getAlignment(), MMOFlags, AAInfo);
14913   Ptr =
14914       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
14915                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
14916   // Higher value store.
14917   SDValue St1 =
14918       DAG.getStore(St0, DL, Hi, Ptr,
14919                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
14920                    Alignment / 2, MMOFlags, AAInfo);
14921   return St1;
14922 }
14923
14924 /// Convert a disguised subvector insertion into a shuffle:
14925 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
14926 /// bitcast(shuffle (bitcast V), (extended X), Mask)
14927 /// Note: We do not use an insert_subvector node because that requires a legal
14928 /// subvector type.
14929 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
14930   SDValue InsertVal = N->getOperand(1);
14931   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
14932       !InsertVal.getOperand(0).getValueType().isVector())
14933     return SDValue();
14934
14935   SDValue SubVec = InsertVal.getOperand(0);
14936   SDValue DestVec = N->getOperand(0);
14937   EVT SubVecVT = SubVec.getValueType();
14938   EVT VT = DestVec.getValueType();
14939   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
14940   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
14941   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
14942
14943   // Step 1: Create a shuffle mask that implements this insert operation. The
14944   // vector that we are inserting into will be operand 0 of the shuffle, so
14945   // those elements are just 'i'. The inserted subvector is in the first
14946   // positions of operand 1 of the shuffle. Example:
14947   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
14948   SmallVector<int, 16> Mask(NumMaskVals);
14949   for (unsigned i = 0; i != NumMaskVals; ++i) {
14950     if (i / NumSrcElts == InsIndex)
14951       Mask[i] = (i % NumSrcElts) + NumMaskVals;
14952     else
14953       Mask[i] = i;
14954   }
14955
14956   // Bail out if the target can not handle the shuffle we want to create.
14957   EVT SubVecEltVT = SubVecVT.getVectorElementType();
14958   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
14959   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
14960     return SDValue();
14961
14962   // Step 2: Create a wide vector from the inserted source vector by appending
14963   // undefined elements. This is the same size as our destination vector.
14964   SDLoc DL(N);
14965   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
14966   ConcatOps[0] = SubVec;
14967   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
14968
14969   // Step 3: Shuffle in the padded subvector.
14970   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
14971   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
14972   AddToWorklist(PaddedSubV.getNode());
14973   AddToWorklist(DestVecBC.getNode());
14974   AddToWorklist(Shuf.getNode());
14975   return DAG.getBitcast(VT, Shuf);
14976 }
14977
14978 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
14979   SDValue InVec = N->getOperand(0);
14980   SDValue InVal = N->getOperand(1);
14981   SDValue EltNo = N->getOperand(2);
14982   SDLoc DL(N);
14983
14984   // If the inserted element is an UNDEF, just use the input vector.
14985   if (InVal.isUndef())
14986     return InVec;
14987
14988   EVT VT = InVec.getValueType();
14989
14990   // Remove redundant insertions:
14991   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
14992   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14993       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
14994     return InVec;
14995
14996   // We must know which element is being inserted for folds below here.
14997   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
14998   if (!IndexC)
14999     return SDValue();
15000   unsigned Elt = IndexC->getZExtValue();
15001
15002   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
15003     return Shuf;
15004
15005   // Canonicalize insert_vector_elt dag nodes.
15006   // Example:
15007   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
15008   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
15009   //
15010   // Do this only if the child insert_vector node has one use; also
15011   // do this only if indices are both constants and Idx1 < Idx0.
15012   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
15013       && isa<ConstantSDNode>(InVec.getOperand(2))) {
15014     unsigned OtherElt = InVec.getConstantOperandVal(2);
15015     if (Elt < OtherElt) {
15016       // Swap nodes.
15017       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
15018                                   InVec.getOperand(0), InVal, EltNo);
15019       AddToWorklist(NewOp.getNode());
15020       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
15021                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
15022     }
15023   }
15024
15025   // If we can't generate a legal BUILD_VECTOR, exit
15026   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
15027     return SDValue();
15028
15029   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
15030   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
15031   // vector elements.
15032   SmallVector<SDValue, 8> Ops;
15033   // Do not combine these two vectors if the output vector will not replace
15034   // the input vector.
15035   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
15036     Ops.append(InVec.getNode()->op_begin(),
15037                InVec.getNode()->op_end());
15038   } else if (InVec.isUndef()) {
15039     unsigned NElts = VT.getVectorNumElements();
15040     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
15041   } else {
15042     return SDValue();
15043   }
15044
15045   // Insert the element
15046   if (Elt < Ops.size()) {
15047     // All the operands of BUILD_VECTOR must have the same type;
15048     // we enforce that here.
15049     EVT OpVT = Ops[0].getValueType();
15050     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
15051   }
15052
15053   // Return the new vector
15054   return DAG.getBuildVector(VT, DL, Ops);
15055 }
15056
15057 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
15058     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
15059   assert(!OriginalLoad->isVolatile());
15060
15061   EVT ResultVT = EVE->getValueType(0);
15062   EVT VecEltVT = InVecVT.getVectorElementType();
15063   unsigned Align = OriginalLoad->getAlignment();
15064   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
15065       VecEltVT.getTypeForEVT(*DAG.getContext()));
15066
15067   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
15068     return SDValue();
15069
15070   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
15071     ISD::NON_EXTLOAD : ISD::EXTLOAD;
15072   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
15073     return SDValue();
15074
15075   Align = NewAlign;
15076
15077   SDValue NewPtr = OriginalLoad->getBasePtr();
15078   SDValue Offset;
15079   EVT PtrType = NewPtr.getValueType();
15080   MachinePointerInfo MPI;
15081   SDLoc DL(EVE);
15082   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
15083     int Elt = ConstEltNo->getZExtValue();
15084     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
15085     Offset = DAG.getConstant(PtrOff, DL, PtrType);
15086     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
15087   } else {
15088     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
15089     Offset = DAG.getNode(
15090         ISD::MUL, DL, PtrType, Offset,
15091         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
15092     MPI = OriginalLoad->getPointerInfo();
15093   }
15094   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
15095
15096   // The replacement we need to do here is a little tricky: we need to
15097   // replace an extractelement of a load with a load.
15098   // Use ReplaceAllUsesOfValuesWith to do the replacement.
15099   // Note that this replacement assumes that the extractvalue is the only
15100   // use of the load; that's okay because we don't want to perform this
15101   // transformation in other cases anyway.
15102   SDValue Load;
15103   SDValue Chain;
15104   if (ResultVT.bitsGT(VecEltVT)) {
15105     // If the result type of vextract is wider than the load, then issue an
15106     // extending load instead.
15107     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
15108                                                   VecEltVT)
15109                                    ? ISD::ZEXTLOAD
15110                                    : ISD::EXTLOAD;
15111     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
15112                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
15113                           Align, OriginalLoad->getMemOperand()->getFlags(),
15114                           OriginalLoad->getAAInfo());
15115     Chain = Load.getValue(1);
15116   } else {
15117     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
15118                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
15119                        OriginalLoad->getAAInfo());
15120     Chain = Load.getValue(1);
15121     if (ResultVT.bitsLT(VecEltVT))
15122       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
15123     else
15124       Load = DAG.getBitcast(ResultVT, Load);
15125   }
15126   WorklistRemover DeadNodes(*this);
15127   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
15128   SDValue To[] = { Load, Chain };
15129   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
15130   // Since we're explicitly calling ReplaceAllUses, add the new node to the
15131   // worklist explicitly as well.
15132   AddToWorklist(Load.getNode());
15133   AddUsersToWorklist(Load.getNode()); // Add users too
15134   // Make sure to revisit this node to clean it up; it will usually be dead.
15135   AddToWorklist(EVE);
15136   ++OpsNarrowed;
15137   return SDValue(EVE, 0);
15138 }
15139
15140 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
15141   // (vextract (scalar_to_vector val, 0) -> val
15142   SDValue InVec = N->getOperand(0);
15143   EVT VT = InVec.getValueType();
15144   EVT NVT = N->getValueType(0);
15145
15146   if (InVec.isUndef())
15147     return DAG.getUNDEF(NVT);
15148
15149   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15150     // Check if the result type doesn't match the inserted element type. A
15151     // SCALAR_TO_VECTOR may truncate the inserted element and the
15152     // EXTRACT_VECTOR_ELT may widen the extracted vector.
15153     SDValue InOp = InVec.getOperand(0);
15154     if (InOp.getValueType() != NVT) {
15155       assert(InOp.getValueType().isInteger() && NVT.isInteger());
15156       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
15157     }
15158     return InOp;
15159   }
15160
15161   SDValue EltNo = N->getOperand(1);
15162   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
15163
15164   // extract_vector_elt of out-of-bounds element -> UNDEF
15165   if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
15166     return DAG.getUNDEF(NVT);
15167
15168   // extract_vector_elt (build_vector x, y), 1 -> y
15169   if (ConstEltNo &&
15170       InVec.getOpcode() == ISD::BUILD_VECTOR &&
15171       TLI.isTypeLegal(VT) &&
15172       (InVec.hasOneUse() ||
15173        TLI.aggressivelyPreferBuildVectorSources(VT))) {
15174     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
15175     EVT InEltVT = Elt.getValueType();
15176
15177     // Sometimes build_vector's scalar input types do not match result type.
15178     if (NVT == InEltVT)
15179       return Elt;
15180
15181     // TODO: It may be useful to truncate if free if the build_vector implicitly
15182     // converts.
15183   }
15184
15185   // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
15186   bool isLE = DAG.getDataLayout().isLittleEndian();
15187   unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
15188   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
15189       ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
15190     SDValue BCSrc = InVec.getOperand(0);
15191     if (BCSrc.getValueType().isScalarInteger())
15192       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
15193   }
15194
15195   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
15196   //
15197   // This only really matters if the index is non-constant since other combines
15198   // on the constant elements already work.
15199   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
15200       EltNo == InVec.getOperand(2)) {
15201     SDValue Elt = InVec.getOperand(1);
15202     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
15203   }
15204
15205   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
15206   // We only perform this optimization before the op legalization phase because
15207   // we may introduce new vector instructions which are not backed by TD
15208   // patterns. For example on AVX, extracting elements from a wide vector
15209   // without using extract_subvector. However, if we can find an underlying
15210   // scalar value, then we can always use that.
15211   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
15212     int NumElem = VT.getVectorNumElements();
15213     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
15214     // Find the new index to extract from.
15215     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
15216
15217     // Extracting an undef index is undef.
15218     if (OrigElt == -1)
15219       return DAG.getUNDEF(NVT);
15220
15221     // Select the right vector half to extract from.
15222     SDValue SVInVec;
15223     if (OrigElt < NumElem) {
15224       SVInVec = InVec->getOperand(0);
15225     } else {
15226       SVInVec = InVec->getOperand(1);
15227       OrigElt -= NumElem;
15228     }
15229
15230     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
15231       SDValue InOp = SVInVec.getOperand(OrigElt);
15232       if (InOp.getValueType() != NVT) {
15233         assert(InOp.getValueType().isInteger() && NVT.isInteger());
15234         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
15235       }
15236
15237       return InOp;
15238     }
15239
15240     // FIXME: We should handle recursing on other vector shuffles and
15241     // scalar_to_vector here as well.
15242
15243     if (!LegalOperations ||
15244         // FIXME: Should really be just isOperationLegalOrCustom.
15245         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
15246         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
15247       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15248       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
15249                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
15250     }
15251   }
15252
15253   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
15254   // simplify it based on the (valid) extraction indices.
15255   if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
15256         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15257                Use->getOperand(0) == InVec &&
15258                isa<ConstantSDNode>(Use->getOperand(1));
15259       })) {
15260     APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
15261     for (SDNode *Use : InVec->uses()) {
15262       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
15263       if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
15264         DemandedElts.setBit(CstElt->getZExtValue());
15265     }
15266     if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
15267       return SDValue(N, 0);
15268   }
15269
15270   bool BCNumEltsChanged = false;
15271   EVT ExtVT = VT.getVectorElementType();
15272   EVT LVT = ExtVT;
15273
15274   // If the result of load has to be truncated, then it's not necessarily
15275   // profitable.
15276   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
15277     return SDValue();
15278
15279   if (InVec.getOpcode() == ISD::BITCAST) {
15280     // Don't duplicate a load with other uses.
15281     if (!InVec.hasOneUse())
15282       return SDValue();
15283
15284     EVT BCVT = InVec.getOperand(0).getValueType();
15285     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
15286       return SDValue();
15287     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
15288       BCNumEltsChanged = true;
15289     InVec = InVec.getOperand(0);
15290     ExtVT = BCVT.getVectorElementType();
15291   }
15292
15293   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
15294   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
15295       ISD::isNormalLoad(InVec.getNode()) &&
15296       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
15297     SDValue Index = N->getOperand(1);
15298     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
15299       if (!OrigLoad->isVolatile()) {
15300         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
15301                                                              OrigLoad);
15302       }
15303     }
15304   }
15305
15306   // Perform only after legalization to ensure build_vector / vector_shuffle
15307   // optimizations have already been done.
15308   if (!LegalOperations) return SDValue();
15309
15310   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
15311   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
15312   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
15313
15314   if (ConstEltNo) {
15315     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
15316
15317     LoadSDNode *LN0 = nullptr;
15318     const ShuffleVectorSDNode *SVN = nullptr;
15319     if (ISD::isNormalLoad(InVec.getNode())) {
15320       LN0 = cast<LoadSDNode>(InVec);
15321     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15322                InVec.getOperand(0).getValueType() == ExtVT &&
15323                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
15324       // Don't duplicate a load with other uses.
15325       if (!InVec.hasOneUse())
15326         return SDValue();
15327
15328       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
15329     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
15330       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
15331       // =>
15332       // (load $addr+1*size)
15333
15334       // Don't duplicate a load with other uses.
15335       if (!InVec.hasOneUse())
15336         return SDValue();
15337
15338       // If the bit convert changed the number of elements, it is unsafe
15339       // to examine the mask.
15340       if (BCNumEltsChanged)
15341         return SDValue();
15342
15343       // Select the input vector, guarding against out of range extract vector.
15344       unsigned NumElems = VT.getVectorNumElements();
15345       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
15346       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
15347
15348       if (InVec.getOpcode() == ISD::BITCAST) {
15349         // Don't duplicate a load with other uses.
15350         if (!InVec.hasOneUse())
15351           return SDValue();
15352
15353         InVec = InVec.getOperand(0);
15354       }
15355       if (ISD::isNormalLoad(InVec.getNode())) {
15356         LN0 = cast<LoadSDNode>(InVec);
15357         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
15358         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
15359       }
15360     }
15361
15362     // Make sure we found a non-volatile load and the extractelement is
15363     // the only use.
15364     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
15365       return SDValue();
15366
15367     // If Idx was -1 above, Elt is going to be -1, so just return undef.
15368     if (Elt == -1)
15369       return DAG.getUNDEF(LVT);
15370
15371     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
15372   }
15373
15374   return SDValue();
15375 }
15376
15377 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
15378 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
15379   // We perform this optimization post type-legalization because
15380   // the type-legalizer often scalarizes integer-promoted vectors.
15381   // Performing this optimization before may create bit-casts which
15382   // will be type-legalized to complex code sequences.
15383   // We perform this optimization only before the operation legalizer because we
15384   // may introduce illegal operations.
15385   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
15386     return SDValue();
15387
15388   unsigned NumInScalars = N->getNumOperands();
15389   SDLoc DL(N);
15390   EVT VT = N->getValueType(0);
15391
15392   // Check to see if this is a BUILD_VECTOR of a bunch of values
15393   // which come from any_extend or zero_extend nodes. If so, we can create
15394   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
15395   // optimizations. We do not handle sign-extend because we can't fill the sign
15396   // using shuffles.
15397   EVT SourceType = MVT::Other;
15398   bool AllAnyExt = true;
15399
15400   for (unsigned i = 0; i != NumInScalars; ++i) {
15401     SDValue In = N->getOperand(i);
15402     // Ignore undef inputs.
15403     if (In.isUndef()) continue;
15404
15405     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
15406     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
15407
15408     // Abort if the element is not an extension.
15409     if (!ZeroExt && !AnyExt) {
15410       SourceType = MVT::Other;
15411       break;
15412     }
15413
15414     // The input is a ZeroExt or AnyExt. Check the original type.
15415     EVT InTy = In.getOperand(0).getValueType();
15416
15417     // Check that all of the widened source types are the same.
15418     if (SourceType == MVT::Other)
15419       // First time.
15420       SourceType = InTy;
15421     else if (InTy != SourceType) {
15422       // Multiple income types. Abort.
15423       SourceType = MVT::Other;
15424       break;
15425     }
15426
15427     // Check if all of the extends are ANY_EXTENDs.
15428     AllAnyExt &= AnyExt;
15429   }
15430
15431   // In order to have valid types, all of the inputs must be extended from the
15432   // same source type and all of the inputs must be any or zero extend.
15433   // Scalar sizes must be a power of two.
15434   EVT OutScalarTy = VT.getScalarType();
15435   bool ValidTypes = SourceType != MVT::Other &&
15436                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
15437                  isPowerOf2_32(SourceType.getSizeInBits());
15438
15439   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
15440   // turn into a single shuffle instruction.
15441   if (!ValidTypes)
15442     return SDValue();
15443
15444   bool isLE = DAG.getDataLayout().isLittleEndian();
15445   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
15446   assert(ElemRatio > 1 && "Invalid element size ratio");
15447   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
15448                                DAG.getConstant(0, DL, SourceType);
15449
15450   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
15451   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
15452
15453   // Populate the new build_vector
15454   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
15455     SDValue Cast = N->getOperand(i);
15456     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
15457             Cast.getOpcode() == ISD::ZERO_EXTEND ||
15458             Cast.isUndef()) && "Invalid cast opcode");
15459     SDValue In;
15460     if (Cast.isUndef())
15461       In = DAG.getUNDEF(SourceType);
15462     else
15463       In = Cast->getOperand(0);
15464     unsigned Index = isLE ? (i * ElemRatio) :
15465                             (i * ElemRatio + (ElemRatio - 1));
15466
15467     assert(Index < Ops.size() && "Invalid index");
15468     Ops[Index] = In;
15469   }
15470
15471   // The type of the new BUILD_VECTOR node.
15472   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
15473   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
15474          "Invalid vector size");
15475   // Check if the new vector type is legal.
15476   if (!isTypeLegal(VecVT) ||
15477       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
15478        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
15479     return SDValue();
15480
15481   // Make the new BUILD_VECTOR.
15482   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
15483
15484   // The new BUILD_VECTOR node has the potential to be further optimized.
15485   AddToWorklist(BV.getNode());
15486   // Bitcast to the desired type.
15487   return DAG.getBitcast(VT, BV);
15488 }
15489
15490 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
15491   EVT VT = N->getValueType(0);
15492
15493   unsigned NumInScalars = N->getNumOperands();
15494   SDLoc DL(N);
15495
15496   EVT SrcVT = MVT::Other;
15497   unsigned Opcode = ISD::DELETED_NODE;
15498   unsigned NumDefs = 0;
15499
15500   for (unsigned i = 0; i != NumInScalars; ++i) {
15501     SDValue In = N->getOperand(i);
15502     unsigned Opc = In.getOpcode();
15503
15504     if (Opc == ISD::UNDEF)
15505       continue;
15506
15507     // If all scalar values are floats and converted from integers.
15508     if (Opcode == ISD::DELETED_NODE &&
15509         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
15510       Opcode = Opc;
15511     }
15512
15513     if (Opc != Opcode)
15514       return SDValue();
15515
15516     EVT InVT = In.getOperand(0).getValueType();
15517
15518     // If all scalar values are typed differently, bail out. It's chosen to
15519     // simplify BUILD_VECTOR of integer types.
15520     if (SrcVT == MVT::Other)
15521       SrcVT = InVT;
15522     if (SrcVT != InVT)
15523       return SDValue();
15524     NumDefs++;
15525   }
15526
15527   // If the vector has just one element defined, it's not worth to fold it into
15528   // a vectorized one.
15529   if (NumDefs < 2)
15530     return SDValue();
15531
15532   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
15533          && "Should only handle conversion from integer to float.");
15534   assert(SrcVT != MVT::Other && "Cannot determine source type!");
15535
15536   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
15537
15538   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
15539     return SDValue();
15540
15541   // Just because the floating-point vector type is legal does not necessarily
15542   // mean that the corresponding integer vector type is.
15543   if (!isTypeLegal(NVT))
15544     return SDValue();
15545
15546   SmallVector<SDValue, 8> Opnds;
15547   for (unsigned i = 0; i != NumInScalars; ++i) {
15548     SDValue In = N->getOperand(i);
15549
15550     if (In.isUndef())
15551       Opnds.push_back(DAG.getUNDEF(SrcVT));
15552     else
15553       Opnds.push_back(In.getOperand(0));
15554   }
15555   SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
15556   AddToWorklist(BV.getNode());
15557
15558   return DAG.getNode(Opcode, DL, VT, BV);
15559 }
15560
15561 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
15562                                            ArrayRef<int> VectorMask,
15563                                            SDValue VecIn1, SDValue VecIn2,
15564                                            unsigned LeftIdx) {
15565   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15566   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
15567
15568   EVT VT = N->getValueType(0);
15569   EVT InVT1 = VecIn1.getValueType();
15570   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
15571
15572   unsigned Vec2Offset = 0;
15573   unsigned NumElems = VT.getVectorNumElements();
15574   unsigned ShuffleNumElems = NumElems;
15575
15576   // In case both the input vectors are extracted from same base
15577   // vector we do not need extra addend (Vec2Offset) while
15578   // computing shuffle mask.
15579   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
15580       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
15581       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
15582     Vec2Offset = InVT1.getVectorNumElements();
15583
15584   // We can't generate a shuffle node with mismatched input and output types.
15585   // Try to make the types match the type of the output.
15586   if (InVT1 != VT || InVT2 != VT) {
15587     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
15588       // If the output vector length is a multiple of both input lengths,
15589       // we can concatenate them and pad the rest with undefs.
15590       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
15591       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
15592       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
15593       ConcatOps[0] = VecIn1;
15594       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
15595       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15596       VecIn2 = SDValue();
15597     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
15598       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
15599         return SDValue();
15600
15601       if (!VecIn2.getNode()) {
15602         // If we only have one input vector, and it's twice the size of the
15603         // output, split it in two.
15604         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
15605                              DAG.getConstant(NumElems, DL, IdxTy));
15606         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
15607         // Since we now have shorter input vectors, adjust the offset of the
15608         // second vector's start.
15609         Vec2Offset = NumElems;
15610       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
15611         // VecIn1 is wider than the output, and we have another, possibly
15612         // smaller input. Pad the smaller input with undefs, shuffle at the
15613         // input vector width, and extract the output.
15614         // The shuffle type is different than VT, so check legality again.
15615         if (LegalOperations &&
15616             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
15617           return SDValue();
15618
15619         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
15620         // lower it back into a BUILD_VECTOR. So if the inserted type is
15621         // illegal, don't even try.
15622         if (InVT1 != InVT2) {
15623           if (!TLI.isTypeLegal(InVT2))
15624             return SDValue();
15625           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
15626                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
15627         }
15628         ShuffleNumElems = NumElems * 2;
15629       } else {
15630         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
15631         // than VecIn1. We can't handle this for now - this case will disappear
15632         // when we start sorting the vectors by type.
15633         return SDValue();
15634       }
15635     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
15636                InVT1.getSizeInBits() == VT.getSizeInBits()) {
15637       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
15638       ConcatOps[0] = VecIn2;
15639       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15640     } else {
15641       // TODO: Support cases where the length mismatch isn't exactly by a
15642       // factor of 2.
15643       // TODO: Move this check upwards, so that if we have bad type
15644       // mismatches, we don't create any DAG nodes.
15645       return SDValue();
15646     }
15647   }
15648
15649   // Initialize mask to undef.
15650   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
15651
15652   // Only need to run up to the number of elements actually used, not the
15653   // total number of elements in the shuffle - if we are shuffling a wider
15654   // vector, the high lanes should be set to undef.
15655   for (unsigned i = 0; i != NumElems; ++i) {
15656     if (VectorMask[i] <= 0)
15657       continue;
15658
15659     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
15660     if (VectorMask[i] == (int)LeftIdx) {
15661       Mask[i] = ExtIndex;
15662     } else if (VectorMask[i] == (int)LeftIdx + 1) {
15663       Mask[i] = Vec2Offset + ExtIndex;
15664     }
15665   }
15666
15667   // The type the input vectors may have changed above.
15668   InVT1 = VecIn1.getValueType();
15669
15670   // If we already have a VecIn2, it should have the same type as VecIn1.
15671   // If we don't, get an undef/zero vector of the appropriate type.
15672   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
15673   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
15674
15675   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
15676   if (ShuffleNumElems > NumElems)
15677     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
15678
15679   return Shuffle;
15680 }
15681
15682 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
15683 // operations. If the types of the vectors we're extracting from allow it,
15684 // turn this into a vector_shuffle node.
15685 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
15686   SDLoc DL(N);
15687   EVT VT = N->getValueType(0);
15688
15689   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
15690   if (!isTypeLegal(VT))
15691     return SDValue();
15692
15693   // May only combine to shuffle after legalize if shuffle is legal.
15694   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
15695     return SDValue();
15696
15697   bool UsesZeroVector = false;
15698   unsigned NumElems = N->getNumOperands();
15699
15700   // Record, for each element of the newly built vector, which input vector
15701   // that element comes from. -1 stands for undef, 0 for the zero vector,
15702   // and positive values for the input vectors.
15703   // VectorMask maps each element to its vector number, and VecIn maps vector
15704   // numbers to their initial SDValues.
15705
15706   SmallVector<int, 8> VectorMask(NumElems, -1);
15707   SmallVector<SDValue, 8> VecIn;
15708   VecIn.push_back(SDValue());
15709
15710   for (unsigned i = 0; i != NumElems; ++i) {
15711     SDValue Op = N->getOperand(i);
15712
15713     if (Op.isUndef())
15714       continue;
15715
15716     // See if we can use a blend with a zero vector.
15717     // TODO: Should we generalize this to a blend with an arbitrary constant
15718     // vector?
15719     if (isNullConstant(Op) || isNullFPConstant(Op)) {
15720       UsesZeroVector = true;
15721       VectorMask[i] = 0;
15722       continue;
15723     }
15724
15725     // Not an undef or zero. If the input is something other than an
15726     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
15727     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15728         !isa<ConstantSDNode>(Op.getOperand(1)))
15729       return SDValue();
15730     SDValue ExtractedFromVec = Op.getOperand(0);
15731
15732     APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
15733     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
15734       return SDValue();
15735
15736     // All inputs must have the same element type as the output.
15737     if (VT.getVectorElementType() !=
15738         ExtractedFromVec.getValueType().getVectorElementType())
15739       return SDValue();
15740
15741     // Have we seen this input vector before?
15742     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
15743     // a map back from SDValues to numbers isn't worth it.
15744     unsigned Idx = std::distance(
15745         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
15746     if (Idx == VecIn.size())
15747       VecIn.push_back(ExtractedFromVec);
15748
15749     VectorMask[i] = Idx;
15750   }
15751
15752   // If we didn't find at least one input vector, bail out.
15753   if (VecIn.size() < 2)
15754     return SDValue();
15755
15756   // If all the Operands of BUILD_VECTOR extract from same
15757   // vector, then split the vector efficiently based on the maximum
15758   // vector access index and adjust the VectorMask and
15759   // VecIn accordingly.
15760   if (VecIn.size() == 2) {
15761     unsigned MaxIndex = 0;
15762     unsigned NearestPow2 = 0;
15763     SDValue Vec = VecIn.back();
15764     EVT InVT = Vec.getValueType();
15765     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15766     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
15767
15768     for (unsigned i = 0; i < NumElems; i++) {
15769       if (VectorMask[i] <= 0)
15770         continue;
15771       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
15772       IndexVec[i] = Index;
15773       MaxIndex = std::max(MaxIndex, Index);
15774     }
15775
15776     NearestPow2 = PowerOf2Ceil(MaxIndex);
15777     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
15778         NumElems * 2 < NearestPow2) {
15779       unsigned SplitSize = NearestPow2 / 2;
15780       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
15781                                      InVT.getVectorElementType(), SplitSize);
15782       if (TLI.isTypeLegal(SplitVT)) {
15783         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
15784                                      DAG.getConstant(SplitSize, DL, IdxTy));
15785         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
15786                                      DAG.getConstant(0, DL, IdxTy));
15787         VecIn.pop_back();
15788         VecIn.push_back(VecIn1);
15789         VecIn.push_back(VecIn2);
15790
15791         for (unsigned i = 0; i < NumElems; i++) {
15792           if (VectorMask[i] <= 0)
15793             continue;
15794           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
15795         }
15796       }
15797     }
15798   }
15799
15800   // TODO: We want to sort the vectors by descending length, so that adjacent
15801   // pairs have similar length, and the longer vector is always first in the
15802   // pair.
15803
15804   // TODO: Should this fire if some of the input vectors has illegal type (like
15805   // it does now), or should we let legalization run its course first?
15806
15807   // Shuffle phase:
15808   // Take pairs of vectors, and shuffle them so that the result has elements
15809   // from these vectors in the correct places.
15810   // For example, given:
15811   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
15812   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
15813   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
15814   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
15815   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
15816   // We will generate:
15817   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
15818   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
15819   SmallVector<SDValue, 4> Shuffles;
15820   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
15821     unsigned LeftIdx = 2 * In + 1;
15822     SDValue VecLeft = VecIn[LeftIdx];
15823     SDValue VecRight =
15824         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
15825
15826     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
15827                                                 VecRight, LeftIdx))
15828       Shuffles.push_back(Shuffle);
15829     else
15830       return SDValue();
15831   }
15832
15833   // If we need the zero vector as an "ingredient" in the blend tree, add it
15834   // to the list of shuffles.
15835   if (UsesZeroVector)
15836     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
15837                                       : DAG.getConstantFP(0.0, DL, VT));
15838
15839   // If we only have one shuffle, we're done.
15840   if (Shuffles.size() == 1)
15841     return Shuffles[0];
15842
15843   // Update the vector mask to point to the post-shuffle vectors.
15844   for (int &Vec : VectorMask)
15845     if (Vec == 0)
15846       Vec = Shuffles.size() - 1;
15847     else
15848       Vec = (Vec - 1) / 2;
15849
15850   // More than one shuffle. Generate a binary tree of blends, e.g. if from
15851   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
15852   // generate:
15853   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
15854   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
15855   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
15856   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
15857   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
15858   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
15859   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
15860
15861   // Make sure the initial size of the shuffle list is even.
15862   if (Shuffles.size() % 2)
15863     Shuffles.push_back(DAG.getUNDEF(VT));
15864
15865   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
15866     if (CurSize % 2) {
15867       Shuffles[CurSize] = DAG.getUNDEF(VT);
15868       CurSize++;
15869     }
15870     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
15871       int Left = 2 * In;
15872       int Right = 2 * In + 1;
15873       SmallVector<int, 8> Mask(NumElems, -1);
15874       for (unsigned i = 0; i != NumElems; ++i) {
15875         if (VectorMask[i] == Left) {
15876           Mask[i] = i;
15877           VectorMask[i] = In;
15878         } else if (VectorMask[i] == Right) {
15879           Mask[i] = i + NumElems;
15880           VectorMask[i] = In;
15881         }
15882       }
15883
15884       Shuffles[In] =
15885           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
15886     }
15887   }
15888   return Shuffles[0];
15889 }
15890
15891 // Try to turn a build vector of zero extends of extract vector elts into a
15892 // a vector zero extend and possibly an extract subvector.
15893 // TODO: Support sign extend or any extend?
15894 // TODO: Allow undef elements?
15895 // TODO: Don't require the extracts to start at element 0.
15896 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
15897   if (LegalOperations)
15898     return SDValue();
15899
15900   EVT VT = N->getValueType(0);
15901
15902   SDValue Op0 = N->getOperand(0);
15903   auto checkElem = [&](SDValue Op) -> int64_t {
15904     if (Op.getOpcode() == ISD::ZERO_EXTEND &&
15905         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15906         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
15907       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
15908         return C->getZExtValue();
15909     return -1;
15910   };
15911
15912   // Make sure the first element matches
15913   // (zext (extract_vector_elt X, C))
15914   int64_t Offset = checkElem(Op0);
15915   if (Offset < 0)
15916     return SDValue();
15917
15918   unsigned NumElems = N->getNumOperands();
15919   SDValue In = Op0.getOperand(0).getOperand(0);
15920   EVT InSVT = In.getValueType().getScalarType();
15921   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
15922
15923   // Don't create an illegal input type after type legalization.
15924   if (LegalTypes && !TLI.isTypeLegal(InVT))
15925     return SDValue();
15926
15927   // Ensure all the elements come from the same vector and are adjacent.
15928   for (unsigned i = 1; i != NumElems; ++i) {
15929     if ((Offset + i) != checkElem(N->getOperand(i)))
15930       return SDValue();
15931   }
15932
15933   SDLoc DL(N);
15934   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
15935                    Op0.getOperand(0).getOperand(1));
15936   return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
15937 }
15938
15939 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
15940   EVT VT = N->getValueType(0);
15941
15942   // A vector built entirely of undefs is undef.
15943   if (ISD::allOperandsUndef(N))
15944     return DAG.getUNDEF(VT);
15945
15946   // If this is a splat of a bitcast from another vector, change to a
15947   // concat_vector.
15948   // For example:
15949   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
15950   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
15951   //
15952   // If X is a build_vector itself, the concat can become a larger build_vector.
15953   // TODO: Maybe this is useful for non-splat too?
15954   if (!LegalOperations) {
15955     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
15956       Splat = peekThroughBitcast(Splat);
15957       EVT SrcVT = Splat.getValueType();
15958       if (SrcVT.isVector()) {
15959         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
15960         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
15961                                      SrcVT.getVectorElementType(), NumElts);
15962         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
15963           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
15964           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
15965                                        NewVT, Ops);
15966           return DAG.getBitcast(VT, Concat);
15967         }
15968       }
15969     }
15970   }
15971
15972   // Check if we can express BUILD VECTOR via subvector extract.
15973   if (!LegalTypes && (N->getNumOperands() > 1)) {
15974     SDValue Op0 = N->getOperand(0);
15975     auto checkElem = [&](SDValue Op) -> uint64_t {
15976       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
15977           (Op0.getOperand(0) == Op.getOperand(0)))
15978         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
15979           return CNode->getZExtValue();
15980       return -1;
15981     };
15982
15983     int Offset = checkElem(Op0);
15984     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
15985       if (Offset + i != checkElem(N->getOperand(i))) {
15986         Offset = -1;
15987         break;
15988       }
15989     }
15990
15991     if ((Offset == 0) &&
15992         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
15993       return Op0.getOperand(0);
15994     if ((Offset != -1) &&
15995         ((Offset % N->getValueType(0).getVectorNumElements()) ==
15996          0)) // IDX must be multiple of output size.
15997       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
15998                          Op0.getOperand(0), Op0.getOperand(1));
15999   }
16000
16001   if (SDValue V = convertBuildVecZextToZext(N))
16002     return V;
16003
16004   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
16005     return V;
16006
16007   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
16008     return V;
16009
16010   if (SDValue V = reduceBuildVecToShuffle(N))
16011     return V;
16012
16013   return SDValue();
16014 }
16015
16016 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
16017   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16018   EVT OpVT = N->getOperand(0).getValueType();
16019
16020   // If the operands are legal vectors, leave them alone.
16021   if (TLI.isTypeLegal(OpVT))
16022     return SDValue();
16023
16024   SDLoc DL(N);
16025   EVT VT = N->getValueType(0);
16026   SmallVector<SDValue, 8> Ops;
16027
16028   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
16029   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16030
16031   // Keep track of what we encounter.
16032   bool AnyInteger = false;
16033   bool AnyFP = false;
16034   for (const SDValue &Op : N->ops()) {
16035     if (ISD::BITCAST == Op.getOpcode() &&
16036         !Op.getOperand(0).getValueType().isVector())
16037       Ops.push_back(Op.getOperand(0));
16038     else if (ISD::UNDEF == Op.getOpcode())
16039       Ops.push_back(ScalarUndef);
16040     else
16041       return SDValue();
16042
16043     // Note whether we encounter an integer or floating point scalar.
16044     // If it's neither, bail out, it could be something weird like x86mmx.
16045     EVT LastOpVT = Ops.back().getValueType();
16046     if (LastOpVT.isFloatingPoint())
16047       AnyFP = true;
16048     else if (LastOpVT.isInteger())
16049       AnyInteger = true;
16050     else
16051       return SDValue();
16052   }
16053
16054   // If any of the operands is a floating point scalar bitcast to a vector,
16055   // use floating point types throughout, and bitcast everything.
16056   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
16057   if (AnyFP) {
16058     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
16059     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16060     if (AnyInteger) {
16061       for (SDValue &Op : Ops) {
16062         if (Op.getValueType() == SVT)
16063           continue;
16064         if (Op.isUndef())
16065           Op = ScalarUndef;
16066         else
16067           Op = DAG.getBitcast(SVT, Op);
16068       }
16069     }
16070   }
16071
16072   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
16073                                VT.getSizeInBits() / SVT.getSizeInBits());
16074   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
16075 }
16076
16077 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
16078 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
16079 // most two distinct vectors the same size as the result, attempt to turn this
16080 // into a legal shuffle.
16081 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
16082   EVT VT = N->getValueType(0);
16083   EVT OpVT = N->getOperand(0).getValueType();
16084   int NumElts = VT.getVectorNumElements();
16085   int NumOpElts = OpVT.getVectorNumElements();
16086
16087   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
16088   SmallVector<int, 8> Mask;
16089
16090   for (SDValue Op : N->ops()) {
16091     // Peek through any bitcast.
16092     Op = peekThroughBitcast(Op);
16093
16094     // UNDEF nodes convert to UNDEF shuffle mask values.
16095     if (Op.isUndef()) {
16096       Mask.append((unsigned)NumOpElts, -1);
16097       continue;
16098     }
16099
16100     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16101       return SDValue();
16102
16103     // What vector are we extracting the subvector from and at what index?
16104     SDValue ExtVec = Op.getOperand(0);
16105
16106     // We want the EVT of the original extraction to correctly scale the
16107     // extraction index.
16108     EVT ExtVT = ExtVec.getValueType();
16109
16110     // Peek through any bitcast.
16111     ExtVec = peekThroughBitcast(ExtVec);
16112
16113     // UNDEF nodes convert to UNDEF shuffle mask values.
16114     if (ExtVec.isUndef()) {
16115       Mask.append((unsigned)NumOpElts, -1);
16116       continue;
16117     }
16118
16119     if (!isa<ConstantSDNode>(Op.getOperand(1)))
16120       return SDValue();
16121     int ExtIdx = Op.getConstantOperandVal(1);
16122
16123     // Ensure that we are extracting a subvector from a vector the same
16124     // size as the result.
16125     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
16126       return SDValue();
16127
16128     // Scale the subvector index to account for any bitcast.
16129     int NumExtElts = ExtVT.getVectorNumElements();
16130     if (0 == (NumExtElts % NumElts))
16131       ExtIdx /= (NumExtElts / NumElts);
16132     else if (0 == (NumElts % NumExtElts))
16133       ExtIdx *= (NumElts / NumExtElts);
16134     else
16135       return SDValue();
16136
16137     // At most we can reference 2 inputs in the final shuffle.
16138     if (SV0.isUndef() || SV0 == ExtVec) {
16139       SV0 = ExtVec;
16140       for (int i = 0; i != NumOpElts; ++i)
16141         Mask.push_back(i + ExtIdx);
16142     } else if (SV1.isUndef() || SV1 == ExtVec) {
16143       SV1 = ExtVec;
16144       for (int i = 0; i != NumOpElts; ++i)
16145         Mask.push_back(i + ExtIdx + NumElts);
16146     } else {
16147       return SDValue();
16148     }
16149   }
16150
16151   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
16152     return SDValue();
16153
16154   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
16155                               DAG.getBitcast(VT, SV1), Mask);
16156 }
16157
16158 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
16159   // If we only have one input vector, we don't need to do any concatenation.
16160   if (N->getNumOperands() == 1)
16161     return N->getOperand(0);
16162
16163   // Check if all of the operands are undefs.
16164   EVT VT = N->getValueType(0);
16165   if (ISD::allOperandsUndef(N))
16166     return DAG.getUNDEF(VT);
16167
16168   // Optimize concat_vectors where all but the first of the vectors are undef.
16169   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
16170         return Op.isUndef();
16171       })) {
16172     SDValue In = N->getOperand(0);
16173     assert(In.getValueType().isVector() && "Must concat vectors");
16174
16175     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
16176     if (In->getOpcode() == ISD::BITCAST &&
16177         !In->getOperand(0).getValueType().isVector()) {
16178       SDValue Scalar = In->getOperand(0);
16179
16180       // If the bitcast type isn't legal, it might be a trunc of a legal type;
16181       // look through the trunc so we can still do the transform:
16182       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
16183       if (Scalar->getOpcode() == ISD::TRUNCATE &&
16184           !TLI.isTypeLegal(Scalar.getValueType()) &&
16185           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
16186         Scalar = Scalar->getOperand(0);
16187
16188       EVT SclTy = Scalar->getValueType(0);
16189
16190       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
16191         return SDValue();
16192
16193       // Bail out if the vector size is not a multiple of the scalar size.
16194       if (VT.getSizeInBits() % SclTy.getSizeInBits())
16195         return SDValue();
16196
16197       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
16198       if (VNTNumElms < 2)
16199         return SDValue();
16200
16201       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
16202       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
16203         return SDValue();
16204
16205       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
16206       return DAG.getBitcast(VT, Res);
16207     }
16208   }
16209
16210   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
16211   // We have already tested above for an UNDEF only concatenation.
16212   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
16213   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
16214   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
16215     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
16216   };
16217   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
16218     SmallVector<SDValue, 8> Opnds;
16219     EVT SVT = VT.getScalarType();
16220
16221     EVT MinVT = SVT;
16222     if (!SVT.isFloatingPoint()) {
16223       // If BUILD_VECTOR are from built from integer, they may have different
16224       // operand types. Get the smallest type and truncate all operands to it.
16225       bool FoundMinVT = false;
16226       for (const SDValue &Op : N->ops())
16227         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16228           EVT OpSVT = Op.getOperand(0).getValueType();
16229           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
16230           FoundMinVT = true;
16231         }
16232       assert(FoundMinVT && "Concat vector type mismatch");
16233     }
16234
16235     for (const SDValue &Op : N->ops()) {
16236       EVT OpVT = Op.getValueType();
16237       unsigned NumElts = OpVT.getVectorNumElements();
16238
16239       if (ISD::UNDEF == Op.getOpcode())
16240         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
16241
16242       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16243         if (SVT.isFloatingPoint()) {
16244           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
16245           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
16246         } else {
16247           for (unsigned i = 0; i != NumElts; ++i)
16248             Opnds.push_back(
16249                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
16250         }
16251       }
16252     }
16253
16254     assert(VT.getVectorNumElements() == Opnds.size() &&
16255            "Concat vector type mismatch");
16256     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
16257   }
16258
16259   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
16260   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
16261     return V;
16262
16263   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
16264   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
16265     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
16266       return V;
16267
16268   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
16269   // nodes often generate nop CONCAT_VECTOR nodes.
16270   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
16271   // place the incoming vectors at the exact same location.
16272   SDValue SingleSource = SDValue();
16273   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
16274
16275   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16276     SDValue Op = N->getOperand(i);
16277
16278     if (Op.isUndef())
16279       continue;
16280
16281     // Check if this is the identity extract:
16282     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16283       return SDValue();
16284
16285     // Find the single incoming vector for the extract_subvector.
16286     if (SingleSource.getNode()) {
16287       if (Op.getOperand(0) != SingleSource)
16288         return SDValue();
16289     } else {
16290       SingleSource = Op.getOperand(0);
16291
16292       // Check the source type is the same as the type of the result.
16293       // If not, this concat may extend the vector, so we can not
16294       // optimize it away.
16295       if (SingleSource.getValueType() != N->getValueType(0))
16296         return SDValue();
16297     }
16298
16299     unsigned IdentityIndex = i * PartNumElem;
16300     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
16301     // The extract index must be constant.
16302     if (!CS)
16303       return SDValue();
16304
16305     // Check that we are reading from the identity index.
16306     if (CS->getZExtValue() != IdentityIndex)
16307       return SDValue();
16308   }
16309
16310   if (SingleSource.getNode())
16311     return SingleSource;
16312
16313   return SDValue();
16314 }
16315
16316 /// If we are extracting a subvector produced by a wide binary operator with at
16317 /// at least one operand that was the result of a vector concatenation, then try
16318 /// to use the narrow vector operands directly to avoid the concatenation and
16319 /// extraction.
16320 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
16321   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
16322   // some of these bailouts with other transforms.
16323
16324   // The extract index must be a constant, so we can map it to a concat operand.
16325   auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16326   if (!ExtractIndex)
16327     return SDValue();
16328
16329   // Only handle the case where we are doubling and then halving. A larger ratio
16330   // may require more than two narrow binops to replace the wide binop.
16331   EVT VT = Extract->getValueType(0);
16332   unsigned NumElems = VT.getVectorNumElements();
16333   assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
16334          "Extract index is not a multiple of the vector length.");
16335   if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
16336     return SDValue();
16337
16338   // We are looking for an optionally bitcasted wide vector binary operator
16339   // feeding an extract subvector.
16340   SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
16341
16342   // TODO: The motivating case for this transform is an x86 AVX1 target. That
16343   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
16344   // flavors, but no other 256-bit integer support. This could be extended to
16345   // handle any binop, but that may require fixing/adding other folds to avoid
16346   // codegen regressions.
16347   unsigned BOpcode = BinOp.getOpcode();
16348   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
16349     return SDValue();
16350
16351   // The binop must be a vector type, so we can chop it in half.
16352   EVT WideBVT = BinOp.getValueType();
16353   if (!WideBVT.isVector())
16354     return SDValue();
16355
16356   // Bail out if the target does not support a narrower version of the binop.
16357   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
16358                                    WideBVT.getVectorNumElements() / 2);
16359   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16360   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
16361     return SDValue();
16362
16363   // Peek through bitcasts of the binary operator operands if needed.
16364   SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
16365   SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
16366
16367   // We need at least one concatenation operation of a binop operand to make
16368   // this transform worthwhile. The concat must double the input vector sizes.
16369   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
16370   bool ConcatL =
16371       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
16372   bool ConcatR =
16373       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
16374   if (!ConcatL && !ConcatR)
16375     return SDValue();
16376
16377   // If one of the binop operands was not the result of a concat, we must
16378   // extract a half-sized operand for our new narrow binop. We can't just reuse
16379   // the original extract index operand because we may have bitcasted.
16380   unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
16381   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
16382   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
16383   SDLoc DL(Extract);
16384
16385   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
16386   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
16387   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
16388   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
16389                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16390                                     BinOp.getOperand(0),
16391                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
16392
16393   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
16394                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16395                                     BinOp.getOperand(1),
16396                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
16397
16398   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
16399   return DAG.getBitcast(VT, NarrowBinOp);
16400 }
16401
16402 /// If we are extracting a subvector from a wide vector load, convert to a
16403 /// narrow load to eliminate the extraction:
16404 /// (extract_subvector (load wide vector)) --> (load narrow vector)
16405 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
16406   // TODO: Add support for big-endian. The offset calculation must be adjusted.
16407   if (DAG.getDataLayout().isBigEndian())
16408     return SDValue();
16409
16410   // TODO: The one-use check is overly conservative. Check the cost of the
16411   // extract instead or remove that condition entirely.
16412   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
16413   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16414   if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
16415       !ExtIdx)
16416     return SDValue();
16417
16418   // The narrow load will be offset from the base address of the old load if
16419   // we are extracting from something besides index 0 (little-endian).
16420   EVT VT = Extract->getValueType(0);
16421   SDLoc DL(Extract);
16422   SDValue BaseAddr = Ld->getOperand(1);
16423   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
16424
16425   // TODO: Use "BaseIndexOffset" to make this more effective.
16426   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
16427   MachineFunction &MF = DAG.getMachineFunction();
16428   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
16429                                                    VT.getStoreSize());
16430   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
16431   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
16432   return NewLd;
16433 }
16434
16435 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
16436   EVT NVT = N->getValueType(0);
16437   SDValue V = N->getOperand(0);
16438
16439   // Extract from UNDEF is UNDEF.
16440   if (V.isUndef())
16441     return DAG.getUNDEF(NVT);
16442
16443   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
16444     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
16445       return NarrowLoad;
16446
16447   // Combine:
16448   //    (extract_subvec (concat V1, V2, ...), i)
16449   // Into:
16450   //    Vi if possible
16451   // Only operand 0 is checked as 'concat' assumes all inputs of the same
16452   // type.
16453   if (V->getOpcode() == ISD::CONCAT_VECTORS &&
16454       isa<ConstantSDNode>(N->getOperand(1)) &&
16455       V->getOperand(0).getValueType() == NVT) {
16456     unsigned Idx = N->getConstantOperandVal(1);
16457     unsigned NumElems = NVT.getVectorNumElements();
16458     assert((Idx % NumElems) == 0 &&
16459            "IDX in concat is not a multiple of the result vector length.");
16460     return V->getOperand(Idx / NumElems);
16461   }
16462
16463   // Skip bitcasting
16464   V = peekThroughBitcast(V);
16465
16466   // If the input is a build vector. Try to make a smaller build vector.
16467   if (V->getOpcode() == ISD::BUILD_VECTOR) {
16468     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
16469       EVT InVT = V->getValueType(0);
16470       unsigned ExtractSize = NVT.getSizeInBits();
16471       unsigned EltSize = InVT.getScalarSizeInBits();
16472       // Only do this if we won't split any elements.
16473       if (ExtractSize % EltSize == 0) {
16474         unsigned NumElems = ExtractSize / EltSize;
16475         EVT EltVT = InVT.getVectorElementType();
16476         EVT ExtractVT = NumElems == 1 ? EltVT :
16477           EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
16478         if ((Level < AfterLegalizeDAG ||
16479              (NumElems == 1 ||
16480               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
16481             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
16482           unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
16483                             EltSize;
16484           if (NumElems == 1) {
16485             SDValue Src = V->getOperand(IdxVal);
16486             if (EltVT != Src.getValueType())
16487               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
16488
16489             return DAG.getBitcast(NVT, Src);
16490           }
16491
16492           // Extract the pieces from the original build_vector.
16493           SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
16494                                             makeArrayRef(V->op_begin() + IdxVal,
16495                                                          NumElems));
16496           return DAG.getBitcast(NVT, BuildVec);
16497         }
16498       }
16499     }
16500   }
16501
16502   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
16503     // Handle only simple case where vector being inserted and vector
16504     // being extracted are of same size.
16505     EVT SmallVT = V->getOperand(1).getValueType();
16506     if (!NVT.bitsEq(SmallVT))
16507       return SDValue();
16508
16509     // Only handle cases where both indexes are constants.
16510     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
16511     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
16512
16513     if (InsIdx && ExtIdx) {
16514       // Combine:
16515       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
16516       // Into:
16517       //    indices are equal or bit offsets are equal => V1
16518       //    otherwise => (extract_subvec V1, ExtIdx)
16519       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
16520           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
16521         return DAG.getBitcast(NVT, V->getOperand(1));
16522       return DAG.getNode(
16523           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
16524           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
16525           N->getOperand(1));
16526     }
16527   }
16528
16529   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
16530     return NarrowBOp;
16531
16532   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
16533     return SDValue(N, 0);
16534
16535   return SDValue();
16536 }
16537
16538 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
16539 // or turn a shuffle of a single concat into simpler shuffle then concat.
16540 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
16541   EVT VT = N->getValueType(0);
16542   unsigned NumElts = VT.getVectorNumElements();
16543
16544   SDValue N0 = N->getOperand(0);
16545   SDValue N1 = N->getOperand(1);
16546   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
16547
16548   SmallVector<SDValue, 4> Ops;
16549   EVT ConcatVT = N0.getOperand(0).getValueType();
16550   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
16551   unsigned NumConcats = NumElts / NumElemsPerConcat;
16552
16553   // Special case: shuffle(concat(A,B)) can be more efficiently represented
16554   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
16555   // half vector elements.
16556   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
16557       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
16558                   SVN->getMask().end(), [](int i) { return i == -1; })) {
16559     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
16560                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
16561     N1 = DAG.getUNDEF(ConcatVT);
16562     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
16563   }
16564
16565   // Look at every vector that's inserted. We're looking for exact
16566   // subvector-sized copies from a concatenated vector
16567   for (unsigned I = 0; I != NumConcats; ++I) {
16568     // Make sure we're dealing with a copy.
16569     unsigned Begin = I * NumElemsPerConcat;
16570     bool AllUndef = true, NoUndef = true;
16571     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
16572       if (SVN->getMaskElt(J) >= 0)
16573         AllUndef = false;
16574       else
16575         NoUndef = false;
16576     }
16577
16578     if (NoUndef) {
16579       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
16580         return SDValue();
16581
16582       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
16583         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
16584           return SDValue();
16585
16586       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
16587       if (FirstElt < N0.getNumOperands())
16588         Ops.push_back(N0.getOperand(FirstElt));
16589       else
16590         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
16591
16592     } else if (AllUndef) {
16593       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
16594     } else { // Mixed with general masks and undefs, can't do optimization.
16595       return SDValue();
16596     }
16597   }
16598
16599   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
16600 }
16601
16602 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
16603 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
16604 //
16605 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
16606 // a simplification in some sense, but it isn't appropriate in general: some
16607 // BUILD_VECTORs are substantially cheaper than others. The general case
16608 // of a BUILD_VECTOR requires inserting each element individually (or
16609 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
16610 // all constants is a single constant pool load.  A BUILD_VECTOR where each
16611 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
16612 // are undef lowers to a small number of element insertions.
16613 //
16614 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
16615 // We don't fold shuffles where one side is a non-zero constant, and we don't
16616 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
16617 // non-constant operands. This seems to work out reasonably well in practice.
16618 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
16619                                        SelectionDAG &DAG,
16620                                        const TargetLowering &TLI) {
16621   EVT VT = SVN->getValueType(0);
16622   unsigned NumElts = VT.getVectorNumElements();
16623   SDValue N0 = SVN->getOperand(0);
16624   SDValue N1 = SVN->getOperand(1);
16625
16626   if (!N0->hasOneUse() || !N1->hasOneUse())
16627     return SDValue();
16628
16629   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
16630   // discussed above.
16631   if (!N1.isUndef()) {
16632     bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
16633     bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
16634     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
16635       return SDValue();
16636     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
16637       return SDValue();
16638   }
16639
16640   // If both inputs are splats of the same value then we can safely merge this
16641   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
16642   bool IsSplat = false;
16643   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
16644   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
16645   if (BV0 && BV1)
16646     if (SDValue Splat0 = BV0->getSplatValue())
16647       IsSplat = (Splat0 == BV1->getSplatValue());
16648
16649   SmallVector<SDValue, 8> Ops;
16650   SmallSet<SDValue, 16> DuplicateOps;
16651   for (int M : SVN->getMask()) {
16652     SDValue Op = DAG.getUNDEF(VT.getScalarType());
16653     if (M >= 0) {
16654       int Idx = M < (int)NumElts ? M : M - NumElts;
16655       SDValue &S = (M < (int)NumElts ? N0 : N1);
16656       if (S.getOpcode() == ISD::BUILD_VECTOR) {
16657         Op = S.getOperand(Idx);
16658       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16659         assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
16660         Op = S.getOperand(0);
16661       } else {
16662         // Operand can't be combined - bail out.
16663         return SDValue();
16664       }
16665     }
16666
16667     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
16668     // generating a splat; semantically, this is fine, but it's likely to
16669     // generate low-quality code if the target can't reconstruct an appropriate
16670     // shuffle.
16671     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
16672       if (!IsSplat && !DuplicateOps.insert(Op).second)
16673         return SDValue();
16674
16675     Ops.push_back(Op);
16676   }
16677
16678   // BUILD_VECTOR requires all inputs to be of the same type, find the
16679   // maximum type and extend them all.
16680   EVT SVT = VT.getScalarType();
16681   if (SVT.isInteger())
16682     for (SDValue &Op : Ops)
16683       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
16684   if (SVT != VT.getScalarType())
16685     for (SDValue &Op : Ops)
16686       Op = TLI.isZExtFree(Op.getValueType(), SVT)
16687                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
16688                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
16689   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
16690 }
16691
16692 // Match shuffles that can be converted to any_vector_extend_in_reg.
16693 // This is often generated during legalization.
16694 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
16695 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
16696 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
16697                                             SelectionDAG &DAG,
16698                                             const TargetLowering &TLI,
16699                                             bool LegalOperations,
16700                                             bool LegalTypes) {
16701   EVT VT = SVN->getValueType(0);
16702   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
16703
16704   // TODO Add support for big-endian when we have a test case.
16705   if (!VT.isInteger() || IsBigEndian)
16706     return SDValue();
16707
16708   unsigned NumElts = VT.getVectorNumElements();
16709   unsigned EltSizeInBits = VT.getScalarSizeInBits();
16710   ArrayRef<int> Mask = SVN->getMask();
16711   SDValue N0 = SVN->getOperand(0);
16712
16713   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
16714   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
16715     for (unsigned i = 0; i != NumElts; ++i) {
16716       if (Mask[i] < 0)
16717         continue;
16718       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
16719         continue;
16720       return false;
16721     }
16722     return true;
16723   };
16724
16725   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
16726   // power-of-2 extensions as they are the most likely.
16727   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
16728     // Check for non power of 2 vector sizes
16729     if (NumElts % Scale != 0)
16730       continue;
16731     if (!isAnyExtend(Scale))
16732       continue;
16733
16734     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
16735     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
16736     if (!LegalTypes || TLI.isTypeLegal(OutVT))
16737       if (!LegalOperations ||
16738           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
16739         return DAG.getBitcast(VT,
16740                             DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
16741   }
16742
16743   return SDValue();
16744 }
16745
16746 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
16747 // each source element of a large type into the lowest elements of a smaller
16748 // destination type. This is often generated during legalization.
16749 // If the source node itself was a '*_extend_vector_inreg' node then we should
16750 // then be able to remove it.
16751 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
16752                                         SelectionDAG &DAG) {
16753   EVT VT = SVN->getValueType(0);
16754   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
16755
16756   // TODO Add support for big-endian when we have a test case.
16757   if (!VT.isInteger() || IsBigEndian)
16758     return SDValue();
16759
16760   SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
16761
16762   unsigned Opcode = N0.getOpcode();
16763   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
16764       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
16765       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
16766     return SDValue();
16767
16768   SDValue N00 = N0.getOperand(0);
16769   ArrayRef<int> Mask = SVN->getMask();
16770   unsigned NumElts = VT.getVectorNumElements();
16771   unsigned EltSizeInBits = VT.getScalarSizeInBits();
16772   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
16773   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
16774
16775   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
16776     return SDValue();
16777   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
16778
16779   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
16780   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
16781   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
16782   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
16783     for (unsigned i = 0; i != NumElts; ++i) {
16784       if (Mask[i] < 0)
16785         continue;
16786       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
16787         continue;
16788       return false;
16789     }
16790     return true;
16791   };
16792
16793   // At the moment we just handle the case where we've truncated back to the
16794   // same size as before the extension.
16795   // TODO: handle more extension/truncation cases as cases arise.
16796   if (EltSizeInBits != ExtSrcSizeInBits)
16797     return SDValue();
16798
16799   // We can remove *extend_vector_inreg only if the truncation happens at
16800   // the same scale as the extension.
16801   if (isTruncate(ExtScale))
16802     return DAG.getBitcast(VT, N00);
16803
16804   return SDValue();
16805 }
16806
16807 // Combine shuffles of splat-shuffles of the form:
16808 // shuffle (shuffle V, undef, splat-mask), undef, M
16809 // If splat-mask contains undef elements, we need to be careful about
16810 // introducing undef's in the folded mask which are not the result of composing
16811 // the masks of the shuffles.
16812 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
16813                                      ShuffleVectorSDNode *Splat,
16814                                      SelectionDAG &DAG) {
16815   ArrayRef<int> SplatMask = Splat->getMask();
16816   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
16817
16818   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
16819   // every undef mask element in the splat-shuffle has a corresponding undef
16820   // element in the user-shuffle's mask or if the composition of mask elements
16821   // would result in undef.
16822   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
16823   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
16824   //   In this case it is not legal to simplify to the splat-shuffle because we
16825   //   may be exposing the users of the shuffle an undef element at index 1
16826   //   which was not there before the combine.
16827   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
16828   //   In this case the composition of masks yields SplatMask, so it's ok to
16829   //   simplify to the splat-shuffle.
16830   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
16831   //   In this case the composed mask includes all undef elements of SplatMask
16832   //   and in addition sets element zero to undef. It is safe to simplify to
16833   //   the splat-shuffle.
16834   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
16835                                        ArrayRef<int> SplatMask) {
16836     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
16837       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
16838           SplatMask[UserMask[i]] != -1)
16839         return false;
16840     return true;
16841   };
16842   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
16843     return SDValue(Splat, 0);
16844
16845   // Create a new shuffle with a mask that is composed of the two shuffles'
16846   // masks.
16847   SmallVector<int, 32> NewMask;
16848   for (int Idx : UserMask)
16849     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
16850
16851   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
16852                               Splat->getOperand(0), Splat->getOperand(1),
16853                               NewMask);
16854 }
16855
16856 /// If the shuffle mask is taking exactly one element from the first vector
16857 /// operand and passing through all other elements from the second vector
16858 /// operand, return the index of the mask element that is choosing an element
16859 /// from the first operand. Otherwise, return -1.
16860 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
16861   int MaskSize = Mask.size();
16862   int EltFromOp0 = -1;
16863   // TODO: This does not match if there are undef elements in the shuffle mask.
16864   // Should we ignore undefs in the shuffle mask instead? The trade-off is
16865   // removing an instruction (a shuffle), but losing the knowledge that some
16866   // vector lanes are not needed.
16867   for (int i = 0; i != MaskSize; ++i) {
16868     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
16869       // We're looking for a shuffle of exactly one element from operand 0.
16870       if (EltFromOp0 != -1)
16871         return -1;
16872       EltFromOp0 = i;
16873     } else if (Mask[i] != i + MaskSize) {
16874       // Nothing from operand 1 can change lanes.
16875       return -1;
16876     }
16877   }
16878   return EltFromOp0;
16879 }
16880
16881 /// If a shuffle inserts exactly one element from a source vector operand into
16882 /// another vector operand and we can access the specified element as a scalar,
16883 /// then we can eliminate the shuffle.
16884 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
16885                                       SelectionDAG &DAG) {
16886   // First, check if we are taking one element of a vector and shuffling that
16887   // element into another vector.
16888   ArrayRef<int> Mask = Shuf->getMask();
16889   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
16890   SDValue Op0 = Shuf->getOperand(0);
16891   SDValue Op1 = Shuf->getOperand(1);
16892   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
16893   if (ShufOp0Index == -1) {
16894     // Commute mask and check again.
16895     ShuffleVectorSDNode::commuteMask(CommutedMask);
16896     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
16897     if (ShufOp0Index == -1)
16898       return SDValue();
16899     // Commute operands to match the commuted shuffle mask.
16900     std::swap(Op0, Op1);
16901     Mask = CommutedMask;
16902   }
16903
16904   // The shuffle inserts exactly one element from operand 0 into operand 1.
16905   // Now see if we can access that element as a scalar via a real insert element
16906   // instruction.
16907   // TODO: We can try harder to locate the element as a scalar. Examples: it
16908   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
16909   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
16910          "Shuffle mask value must be from operand 0");
16911   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
16912     return SDValue();
16913
16914   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
16915   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
16916     return SDValue();
16917
16918   // There's an existing insertelement with constant insertion index, so we
16919   // don't need to check the legality/profitability of a replacement operation
16920   // that differs at most in the constant value. The target should be able to
16921   // lower any of those in a similar way. If not, legalization will expand this
16922   // to a scalar-to-vector plus shuffle.
16923   //
16924   // Note that the shuffle may move the scalar from the position that the insert
16925   // element used. Therefore, our new insert element occurs at the shuffle's
16926   // mask index value, not the insert's index value.
16927   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
16928   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
16929                                         Op0.getOperand(2).getValueType());
16930   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
16931                      Op1, Op0.getOperand(1), NewInsIndex);
16932 }
16933
16934 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
16935   EVT VT = N->getValueType(0);
16936   unsigned NumElts = VT.getVectorNumElements();
16937
16938   SDValue N0 = N->getOperand(0);
16939   SDValue N1 = N->getOperand(1);
16940
16941   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
16942
16943   // Canonicalize shuffle undef, undef -> undef
16944   if (N0.isUndef() && N1.isUndef())
16945     return DAG.getUNDEF(VT);
16946
16947   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
16948
16949   // Canonicalize shuffle v, v -> v, undef
16950   if (N0 == N1) {
16951     SmallVector<int, 8> NewMask;
16952     for (unsigned i = 0; i != NumElts; ++i) {
16953       int Idx = SVN->getMaskElt(i);
16954       if (Idx >= (int)NumElts) Idx -= NumElts;
16955       NewMask.push_back(Idx);
16956     }
16957     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
16958   }
16959
16960   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
16961   if (N0.isUndef())
16962     return DAG.getCommutedVectorShuffle(*SVN);
16963
16964   // Remove references to rhs if it is undef
16965   if (N1.isUndef()) {
16966     bool Changed = false;
16967     SmallVector<int, 8> NewMask;
16968     for (unsigned i = 0; i != NumElts; ++i) {
16969       int Idx = SVN->getMaskElt(i);
16970       if (Idx >= (int)NumElts) {
16971         Idx = -1;
16972         Changed = true;
16973       }
16974       NewMask.push_back(Idx);
16975     }
16976     if (Changed)
16977       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
16978   }
16979
16980   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
16981     return InsElt;
16982
16983   // A shuffle of a single vector that is a splat can always be folded.
16984   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
16985     if (N1->isUndef() && N0Shuf->isSplat())
16986       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
16987
16988   // If it is a splat, check if the argument vector is another splat or a
16989   // build_vector.
16990   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
16991     SDNode *V = N0.getNode();
16992
16993     // If this is a bit convert that changes the element type of the vector but
16994     // not the number of vector elements, look through it.  Be careful not to
16995     // look though conversions that change things like v4f32 to v2f64.
16996     if (V->getOpcode() == ISD::BITCAST) {
16997       SDValue ConvInput = V->getOperand(0);
16998       if (ConvInput.getValueType().isVector() &&
16999           ConvInput.getValueType().getVectorNumElements() == NumElts)
17000         V = ConvInput.getNode();
17001     }
17002
17003     if (V->getOpcode() == ISD::BUILD_VECTOR) {
17004       assert(V->getNumOperands() == NumElts &&
17005              "BUILD_VECTOR has wrong number of operands");
17006       SDValue Base;
17007       bool AllSame = true;
17008       for (unsigned i = 0; i != NumElts; ++i) {
17009         if (!V->getOperand(i).isUndef()) {
17010           Base = V->getOperand(i);
17011           break;
17012         }
17013       }
17014       // Splat of <u, u, u, u>, return <u, u, u, u>
17015       if (!Base.getNode())
17016         return N0;
17017       for (unsigned i = 0; i != NumElts; ++i) {
17018         if (V->getOperand(i) != Base) {
17019           AllSame = false;
17020           break;
17021         }
17022       }
17023       // Splat of <x, x, x, x>, return <x, x, x, x>
17024       if (AllSame)
17025         return N0;
17026
17027       // Canonicalize any other splat as a build_vector.
17028       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
17029       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
17030       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
17031
17032       // We may have jumped through bitcasts, so the type of the
17033       // BUILD_VECTOR may not match the type of the shuffle.
17034       if (V->getValueType(0) != VT)
17035         NewBV = DAG.getBitcast(VT, NewBV);
17036       return NewBV;
17037     }
17038   }
17039
17040   // Simplify source operands based on shuffle mask.
17041   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17042     return SDValue(N, 0);
17043
17044   // Match shuffles that can be converted to any_vector_extend_in_reg.
17045   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
17046     return V;
17047
17048   // Combine "truncate_vector_in_reg" style shuffles.
17049   if (SDValue V = combineTruncationShuffle(SVN, DAG))
17050     return V;
17051
17052   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
17053       Level < AfterLegalizeVectorOps &&
17054       (N1.isUndef() ||
17055       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
17056        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
17057     if (SDValue V = partitionShuffleOfConcats(N, DAG))
17058       return V;
17059   }
17060
17061   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17062   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17063   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17064     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
17065       return Res;
17066
17067   // If this shuffle only has a single input that is a bitcasted shuffle,
17068   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
17069   // back to their original types.
17070   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
17071       N1.isUndef() && Level < AfterLegalizeVectorOps &&
17072       TLI.isTypeLegal(VT)) {
17073
17074     // Peek through the bitcast only if there is one user.
17075     SDValue BC0 = N0;
17076     while (BC0.getOpcode() == ISD::BITCAST) {
17077       if (!BC0.hasOneUse())
17078         break;
17079       BC0 = BC0.getOperand(0);
17080     }
17081
17082     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
17083       if (Scale == 1)
17084         return SmallVector<int, 8>(Mask.begin(), Mask.end());
17085
17086       SmallVector<int, 8> NewMask;
17087       for (int M : Mask)
17088         for (int s = 0; s != Scale; ++s)
17089           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
17090       return NewMask;
17091     };
17092
17093     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
17094       EVT SVT = VT.getScalarType();
17095       EVT InnerVT = BC0->getValueType(0);
17096       EVT InnerSVT = InnerVT.getScalarType();
17097
17098       // Determine which shuffle works with the smaller scalar type.
17099       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
17100       EVT ScaleSVT = ScaleVT.getScalarType();
17101
17102       if (TLI.isTypeLegal(ScaleVT) &&
17103           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
17104           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
17105         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17106         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17107
17108         // Scale the shuffle masks to the smaller scalar type.
17109         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
17110         SmallVector<int, 8> InnerMask =
17111             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
17112         SmallVector<int, 8> OuterMask =
17113             ScaleShuffleMask(SVN->getMask(), OuterScale);
17114
17115         // Merge the shuffle masks.
17116         SmallVector<int, 8> NewMask;
17117         for (int M : OuterMask)
17118           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
17119
17120         // Test for shuffle mask legality over both commutations.
17121         SDValue SV0 = BC0->getOperand(0);
17122         SDValue SV1 = BC0->getOperand(1);
17123         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17124         if (!LegalMask) {
17125           std::swap(SV0, SV1);
17126           ShuffleVectorSDNode::commuteMask(NewMask);
17127           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17128         }
17129
17130         if (LegalMask) {
17131           SV0 = DAG.getBitcast(ScaleVT, SV0);
17132           SV1 = DAG.getBitcast(ScaleVT, SV1);
17133           return DAG.getBitcast(
17134               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
17135         }
17136       }
17137     }
17138   }
17139
17140   // Canonicalize shuffles according to rules:
17141   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
17142   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
17143   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
17144   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
17145       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
17146       TLI.isTypeLegal(VT)) {
17147     // The incoming shuffle must be of the same type as the result of the
17148     // current shuffle.
17149     assert(N1->getOperand(0).getValueType() == VT &&
17150            "Shuffle types don't match");
17151
17152     SDValue SV0 = N1->getOperand(0);
17153     SDValue SV1 = N1->getOperand(1);
17154     bool HasSameOp0 = N0 == SV0;
17155     bool IsSV1Undef = SV1.isUndef();
17156     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
17157       // Commute the operands of this shuffle so that next rule
17158       // will trigger.
17159       return DAG.getCommutedVectorShuffle(*SVN);
17160   }
17161
17162   // Try to fold according to rules:
17163   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17164   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17165   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17166   // Don't try to fold shuffles with illegal type.
17167   // Only fold if this shuffle is the only user of the other shuffle.
17168   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
17169       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
17170     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
17171
17172     // Don't try to fold splats; they're likely to simplify somehow, or they
17173     // might be free.
17174     if (OtherSV->isSplat())
17175       return SDValue();
17176
17177     // The incoming shuffle must be of the same type as the result of the
17178     // current shuffle.
17179     assert(OtherSV->getOperand(0).getValueType() == VT &&
17180            "Shuffle types don't match");
17181
17182     SDValue SV0, SV1;
17183     SmallVector<int, 4> Mask;
17184     // Compute the combined shuffle mask for a shuffle with SV0 as the first
17185     // operand, and SV1 as the second operand.
17186     for (unsigned i = 0; i != NumElts; ++i) {
17187       int Idx = SVN->getMaskElt(i);
17188       if (Idx < 0) {
17189         // Propagate Undef.
17190         Mask.push_back(Idx);
17191         continue;
17192       }
17193
17194       SDValue CurrentVec;
17195       if (Idx < (int)NumElts) {
17196         // This shuffle index refers to the inner shuffle N0. Lookup the inner
17197         // shuffle mask to identify which vector is actually referenced.
17198         Idx = OtherSV->getMaskElt(Idx);
17199         if (Idx < 0) {
17200           // Propagate Undef.
17201           Mask.push_back(Idx);
17202           continue;
17203         }
17204
17205         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
17206                                            : OtherSV->getOperand(1);
17207       } else {
17208         // This shuffle index references an element within N1.
17209         CurrentVec = N1;
17210       }
17211
17212       // Simple case where 'CurrentVec' is UNDEF.
17213       if (CurrentVec.isUndef()) {
17214         Mask.push_back(-1);
17215         continue;
17216       }
17217
17218       // Canonicalize the shuffle index. We don't know yet if CurrentVec
17219       // will be the first or second operand of the combined shuffle.
17220       Idx = Idx % NumElts;
17221       if (!SV0.getNode() || SV0 == CurrentVec) {
17222         // Ok. CurrentVec is the left hand side.
17223         // Update the mask accordingly.
17224         SV0 = CurrentVec;
17225         Mask.push_back(Idx);
17226         continue;
17227       }
17228
17229       // Bail out if we cannot convert the shuffle pair into a single shuffle.
17230       if (SV1.getNode() && SV1 != CurrentVec)
17231         return SDValue();
17232
17233       // Ok. CurrentVec is the right hand side.
17234       // Update the mask accordingly.
17235       SV1 = CurrentVec;
17236       Mask.push_back(Idx + NumElts);
17237     }
17238
17239     // Check if all indices in Mask are Undef. In case, propagate Undef.
17240     bool isUndefMask = true;
17241     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
17242       isUndefMask &= Mask[i] < 0;
17243
17244     if (isUndefMask)
17245       return DAG.getUNDEF(VT);
17246
17247     if (!SV0.getNode())
17248       SV0 = DAG.getUNDEF(VT);
17249     if (!SV1.getNode())
17250       SV1 = DAG.getUNDEF(VT);
17251
17252     // Avoid introducing shuffles with illegal mask.
17253     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
17254       ShuffleVectorSDNode::commuteMask(Mask);
17255
17256       if (!TLI.isShuffleMaskLegal(Mask, VT))
17257         return SDValue();
17258
17259       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
17260       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
17261       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
17262       std::swap(SV0, SV1);
17263     }
17264
17265     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17266     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17267     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17268     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
17269   }
17270
17271   return SDValue();
17272 }
17273
17274 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
17275   SDValue InVal = N->getOperand(0);
17276   EVT VT = N->getValueType(0);
17277
17278   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
17279   // with a VECTOR_SHUFFLE and possible truncate.
17280   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17281     SDValue InVec = InVal->getOperand(0);
17282     SDValue EltNo = InVal->getOperand(1);
17283     auto InVecT = InVec.getValueType();
17284     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
17285       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
17286       int Elt = C0->getZExtValue();
17287       NewMask[0] = Elt;
17288       SDValue Val;
17289       // If we have an implict truncate do truncate here as long as it's legal.
17290       // if it's not legal, this should
17291       if (VT.getScalarType() != InVal.getValueType() &&
17292           InVal.getValueType().isScalarInteger() &&
17293           isTypeLegal(VT.getScalarType())) {
17294         Val =
17295             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
17296         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
17297       }
17298       if (VT.getScalarType() == InVecT.getScalarType() &&
17299           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
17300           TLI.isShuffleMaskLegal(NewMask, VT)) {
17301         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
17302                                    DAG.getUNDEF(InVecT), NewMask);
17303         // If the initial vector is the correct size this shuffle is a
17304         // valid result.
17305         if (VT == InVecT)
17306           return Val;
17307         // If not we must truncate the vector.
17308         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
17309           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17310           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
17311           EVT SubVT =
17312               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
17313                                VT.getVectorNumElements());
17314           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
17315                             ZeroIdx);
17316           return Val;
17317         }
17318       }
17319     }
17320   }
17321
17322   return SDValue();
17323 }
17324
17325 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
17326   EVT VT = N->getValueType(0);
17327   SDValue N0 = N->getOperand(0);
17328   SDValue N1 = N->getOperand(1);
17329   SDValue N2 = N->getOperand(2);
17330
17331   // If inserting an UNDEF, just return the original vector.
17332   if (N1.isUndef())
17333     return N0;
17334
17335   // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
17336   // us to pull BITCASTs from input to output.
17337   if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
17338     if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
17339       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
17340
17341   // If this is an insert of an extracted vector into an undef vector, we can
17342   // just use the input to the extract.
17343   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17344       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
17345     return N1.getOperand(0);
17346
17347   // If we are inserting a bitcast value into an undef, with the same
17348   // number of elements, just use the bitcast input of the extract.
17349   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
17350   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
17351   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
17352       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17353       N1.getOperand(0).getOperand(1) == N2 &&
17354       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
17355           VT.getVectorNumElements() &&
17356       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
17357           VT.getSizeInBits()) {
17358     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
17359   }
17360
17361   // If both N1 and N2 are bitcast values on which insert_subvector
17362   // would makes sense, pull the bitcast through.
17363   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
17364   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
17365   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
17366     SDValue CN0 = N0.getOperand(0);
17367     SDValue CN1 = N1.getOperand(0);
17368     EVT CN0VT = CN0.getValueType();
17369     EVT CN1VT = CN1.getValueType();
17370     if (CN0VT.isVector() && CN1VT.isVector() &&
17371         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
17372         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
17373       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
17374                                       CN0.getValueType(), CN0, CN1, N2);
17375       return DAG.getBitcast(VT, NewINSERT);
17376     }
17377   }
17378
17379   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
17380   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
17381   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
17382   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
17383       N0.getOperand(1).getValueType() == N1.getValueType() &&
17384       N0.getOperand(2) == N2)
17385     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
17386                        N1, N2);
17387
17388   if (!isa<ConstantSDNode>(N2))
17389     return SDValue();
17390
17391   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
17392
17393   // Canonicalize insert_subvector dag nodes.
17394   // Example:
17395   // (insert_subvector (insert_subvector A, Idx0), Idx1)
17396   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
17397   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
17398       N1.getValueType() == N0.getOperand(1).getValueType() &&
17399       isa<ConstantSDNode>(N0.getOperand(2))) {
17400     unsigned OtherIdx = N0.getConstantOperandVal(2);
17401     if (InsIdx < OtherIdx) {
17402       // Swap nodes.
17403       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
17404                                   N0.getOperand(0), N1, N2);
17405       AddToWorklist(NewOp.getNode());
17406       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
17407                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
17408     }
17409   }
17410
17411   // If the input vector is a concatenation, and the insert replaces
17412   // one of the pieces, we can optimize into a single concat_vectors.
17413   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
17414       N0.getOperand(0).getValueType() == N1.getValueType()) {
17415     unsigned Factor = N1.getValueType().getVectorNumElements();
17416
17417     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
17418     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
17419
17420     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17421   }
17422
17423   return SDValue();
17424 }
17425
17426 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
17427   SDValue N0 = N->getOperand(0);
17428
17429   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
17430   if (N0->getOpcode() == ISD::FP16_TO_FP)
17431     return N0->getOperand(0);
17432
17433   return SDValue();
17434 }
17435
17436 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
17437   SDValue N0 = N->getOperand(0);
17438
17439   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
17440   if (N0->getOpcode() == ISD::AND) {
17441     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
17442     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
17443       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
17444                          N0.getOperand(0));
17445     }
17446   }
17447
17448   return SDValue();
17449 }
17450
17451 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
17452 /// with the destination vector and a zero vector.
17453 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
17454 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
17455 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
17456   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
17457
17458   EVT VT = N->getValueType(0);
17459   SDValue LHS = N->getOperand(0);
17460   SDValue RHS = peekThroughBitcast(N->getOperand(1));
17461   SDLoc DL(N);
17462
17463   // Make sure we're not running after operation legalization where it
17464   // may have custom lowered the vector shuffles.
17465   if (LegalOperations)
17466     return SDValue();
17467
17468   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
17469     return SDValue();
17470
17471   EVT RVT = RHS.getValueType();
17472   unsigned NumElts = RHS.getNumOperands();
17473
17474   // Attempt to create a valid clear mask, splitting the mask into
17475   // sub elements and checking to see if each is
17476   // all zeros or all ones - suitable for shuffle masking.
17477   auto BuildClearMask = [&](int Split) {
17478     int NumSubElts = NumElts * Split;
17479     int NumSubBits = RVT.getScalarSizeInBits() / Split;
17480
17481     SmallVector<int, 8> Indices;
17482     for (int i = 0; i != NumSubElts; ++i) {
17483       int EltIdx = i / Split;
17484       int SubIdx = i % Split;
17485       SDValue Elt = RHS.getOperand(EltIdx);
17486       if (Elt.isUndef()) {
17487         Indices.push_back(-1);
17488         continue;
17489       }
17490
17491       APInt Bits;
17492       if (isa<ConstantSDNode>(Elt))
17493         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
17494       else if (isa<ConstantFPSDNode>(Elt))
17495         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
17496       else
17497         return SDValue();
17498
17499       // Extract the sub element from the constant bit mask.
17500       if (DAG.getDataLayout().isBigEndian()) {
17501         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
17502       } else {
17503         Bits.lshrInPlace(SubIdx * NumSubBits);
17504       }
17505
17506       if (Split > 1)
17507         Bits = Bits.trunc(NumSubBits);
17508
17509       if (Bits.isAllOnesValue())
17510         Indices.push_back(i);
17511       else if (Bits == 0)
17512         Indices.push_back(i + NumSubElts);
17513       else
17514         return SDValue();
17515     }
17516
17517     // Let's see if the target supports this vector_shuffle.
17518     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
17519     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
17520     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
17521       return SDValue();
17522
17523     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
17524     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
17525                                                    DAG.getBitcast(ClearVT, LHS),
17526                                                    Zero, Indices));
17527   };
17528
17529   // Determine maximum split level (byte level masking).
17530   int MaxSplit = 1;
17531   if (RVT.getScalarSizeInBits() % 8 == 0)
17532     MaxSplit = RVT.getScalarSizeInBits() / 8;
17533
17534   for (int Split = 1; Split <= MaxSplit; ++Split)
17535     if (RVT.getScalarSizeInBits() % Split == 0)
17536       if (SDValue S = BuildClearMask(Split))
17537         return S;
17538
17539   return SDValue();
17540 }
17541
17542 /// Visit a binary vector operation, like ADD.
17543 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
17544   assert(N->getValueType(0).isVector() &&
17545          "SimplifyVBinOp only works on vectors!");
17546
17547   SDValue LHS = N->getOperand(0);
17548   SDValue RHS = N->getOperand(1);
17549   SDValue Ops[] = {LHS, RHS};
17550
17551   // See if we can constant fold the vector operation.
17552   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
17553           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
17554     return Fold;
17555
17556   // Type legalization might introduce new shuffles in the DAG.
17557   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
17558   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
17559   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
17560       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
17561       LHS.getOperand(1).isUndef() &&
17562       RHS.getOperand(1).isUndef()) {
17563     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
17564     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
17565
17566     if (SVN0->getMask().equals(SVN1->getMask())) {
17567       EVT VT = N->getValueType(0);
17568       SDValue UndefVector = LHS.getOperand(1);
17569       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
17570                                      LHS.getOperand(0), RHS.getOperand(0),
17571                                      N->getFlags());
17572       AddUsersToWorklist(N);
17573       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
17574                                   SVN0->getMask());
17575     }
17576   }
17577
17578   return SDValue();
17579 }
17580
17581 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
17582                                     SDValue N2) {
17583   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
17584
17585   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
17586                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
17587
17588   // If we got a simplified select_cc node back from SimplifySelectCC, then
17589   // break it down into a new SETCC node, and a new SELECT node, and then return
17590   // the SELECT node, since we were called with a SELECT node.
17591   if (SCC.getNode()) {
17592     // Check to see if we got a select_cc back (to turn into setcc/select).
17593     // Otherwise, just return whatever node we got back, like fabs.
17594     if (SCC.getOpcode() == ISD::SELECT_CC) {
17595       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
17596                                   N0.getValueType(),
17597                                   SCC.getOperand(0), SCC.getOperand(1),
17598                                   SCC.getOperand(4));
17599       AddToWorklist(SETCC.getNode());
17600       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
17601                            SCC.getOperand(2), SCC.getOperand(3));
17602     }
17603
17604     return SCC;
17605   }
17606   return SDValue();
17607 }
17608
17609 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
17610 /// being selected between, see if we can simplify the select.  Callers of this
17611 /// should assume that TheSelect is deleted if this returns true.  As such, they
17612 /// should return the appropriate thing (e.g. the node) back to the top-level of
17613 /// the DAG combiner loop to avoid it being looked at.
17614 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
17615                                     SDValue RHS) {
17616   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
17617   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
17618   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
17619     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
17620       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
17621       SDValue Sqrt = RHS;
17622       ISD::CondCode CC;
17623       SDValue CmpLHS;
17624       const ConstantFPSDNode *Zero = nullptr;
17625
17626       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
17627         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
17628         CmpLHS = TheSelect->getOperand(0);
17629         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
17630       } else {
17631         // SELECT or VSELECT
17632         SDValue Cmp = TheSelect->getOperand(0);
17633         if (Cmp.getOpcode() == ISD::SETCC) {
17634           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
17635           CmpLHS = Cmp.getOperand(0);
17636           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
17637         }
17638       }
17639       if (Zero && Zero->isZero() &&
17640           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
17641           CC == ISD::SETULT || CC == ISD::SETLT)) {
17642         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
17643         CombineTo(TheSelect, Sqrt);
17644         return true;
17645       }
17646     }
17647   }
17648   // Cannot simplify select with vector condition
17649   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
17650
17651   // If this is a select from two identical things, try to pull the operation
17652   // through the select.
17653   if (LHS.getOpcode() != RHS.getOpcode() ||
17654       !LHS.hasOneUse() || !RHS.hasOneUse())
17655     return false;
17656
17657   // If this is a load and the token chain is identical, replace the select
17658   // of two loads with a load through a select of the address to load from.
17659   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
17660   // constants have been dropped into the constant pool.
17661   if (LHS.getOpcode() == ISD::LOAD) {
17662     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
17663     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
17664
17665     // Token chains must be identical.
17666     if (LHS.getOperand(0) != RHS.getOperand(0) ||
17667         // Do not let this transformation reduce the number of volatile loads.
17668         LLD->isVolatile() || RLD->isVolatile() ||
17669         // FIXME: If either is a pre/post inc/dec load,
17670         // we'd need to split out the address adjustment.
17671         LLD->isIndexed() || RLD->isIndexed() ||
17672         // If this is an EXTLOAD, the VT's must match.
17673         LLD->getMemoryVT() != RLD->getMemoryVT() ||
17674         // If this is an EXTLOAD, the kind of extension must match.
17675         (LLD->getExtensionType() != RLD->getExtensionType() &&
17676          // The only exception is if one of the extensions is anyext.
17677          LLD->getExtensionType() != ISD::EXTLOAD &&
17678          RLD->getExtensionType() != ISD::EXTLOAD) ||
17679         // FIXME: this discards src value information.  This is
17680         // over-conservative. It would be beneficial to be able to remember
17681         // both potential memory locations.  Since we are discarding
17682         // src value info, don't do the transformation if the memory
17683         // locations are not in the default address space.
17684         LLD->getPointerInfo().getAddrSpace() != 0 ||
17685         RLD->getPointerInfo().getAddrSpace() != 0 ||
17686         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
17687                                       LLD->getBasePtr().getValueType()))
17688       return false;
17689
17690     // Check that the select condition doesn't reach either load.  If so,
17691     // folding this will induce a cycle into the DAG.  If not, this is safe to
17692     // xform, so create a select of the addresses.
17693     SDValue Addr;
17694     if (TheSelect->getOpcode() == ISD::SELECT) {
17695       SDNode *CondNode = TheSelect->getOperand(0).getNode();
17696       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
17697           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
17698         return false;
17699       // The loads must not depend on one another.
17700       if (LLD->isPredecessorOf(RLD) ||
17701           RLD->isPredecessorOf(LLD))
17702         return false;
17703       Addr = DAG.getSelect(SDLoc(TheSelect),
17704                            LLD->getBasePtr().getValueType(),
17705                            TheSelect->getOperand(0), LLD->getBasePtr(),
17706                            RLD->getBasePtr());
17707     } else {  // Otherwise SELECT_CC
17708       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
17709       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
17710
17711       if ((LLD->hasAnyUseOfValue(1) &&
17712            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
17713           (RLD->hasAnyUseOfValue(1) &&
17714            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
17715         return false;
17716
17717       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
17718                          LLD->getBasePtr().getValueType(),
17719                          TheSelect->getOperand(0),
17720                          TheSelect->getOperand(1),
17721                          LLD->getBasePtr(), RLD->getBasePtr(),
17722                          TheSelect->getOperand(4));
17723     }
17724
17725     SDValue Load;
17726     // It is safe to replace the two loads if they have different alignments,
17727     // but the new load must be the minimum (most restrictive) alignment of the
17728     // inputs.
17729     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
17730     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
17731     if (!RLD->isInvariant())
17732       MMOFlags &= ~MachineMemOperand::MOInvariant;
17733     if (!RLD->isDereferenceable())
17734       MMOFlags &= ~MachineMemOperand::MODereferenceable;
17735     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
17736       // FIXME: Discards pointer and AA info.
17737       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
17738                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
17739                          MMOFlags);
17740     } else {
17741       // FIXME: Discards pointer and AA info.
17742       Load = DAG.getExtLoad(
17743           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
17744                                                   : LLD->getExtensionType(),
17745           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
17746           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
17747     }
17748
17749     // Users of the select now use the result of the load.
17750     CombineTo(TheSelect, Load);
17751
17752     // Users of the old loads now use the new load's chain.  We know the
17753     // old-load value is dead now.
17754     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
17755     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
17756     return true;
17757   }
17758
17759   return false;
17760 }
17761
17762 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
17763 /// bitwise 'and'.
17764 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
17765                                             SDValue N1, SDValue N2, SDValue N3,
17766                                             ISD::CondCode CC) {
17767   // If this is a select where the false operand is zero and the compare is a
17768   // check of the sign bit, see if we can perform the "gzip trick":
17769   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
17770   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
17771   EVT XType = N0.getValueType();
17772   EVT AType = N2.getValueType();
17773   if (!isNullConstant(N3) || !XType.bitsGE(AType))
17774     return SDValue();
17775
17776   // If the comparison is testing for a positive value, we have to invert
17777   // the sign bit mask, so only do that transform if the target has a bitwise
17778   // 'and not' instruction (the invert is free).
17779   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
17780     // (X > -1) ? A : 0
17781     // (X >  0) ? X : 0 <-- This is canonical signed max.
17782     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
17783       return SDValue();
17784   } else if (CC == ISD::SETLT) {
17785     // (X <  0) ? A : 0
17786     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
17787     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
17788       return SDValue();
17789   } else {
17790     return SDValue();
17791   }
17792
17793   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
17794   // constant.
17795   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
17796   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
17797   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
17798     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
17799     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
17800     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
17801     AddToWorklist(Shift.getNode());
17802
17803     if (XType.bitsGT(AType)) {
17804       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
17805       AddToWorklist(Shift.getNode());
17806     }
17807
17808     if (CC == ISD::SETGT)
17809       Shift = DAG.getNOT(DL, Shift, AType);
17810
17811     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
17812   }
17813
17814   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
17815   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
17816   AddToWorklist(Shift.getNode());
17817
17818   if (XType.bitsGT(AType)) {
17819     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
17820     AddToWorklist(Shift.getNode());
17821   }
17822
17823   if (CC == ISD::SETGT)
17824     Shift = DAG.getNOT(DL, Shift, AType);
17825
17826   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
17827 }
17828
17829 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
17830 /// where 'cond' is the comparison specified by CC.
17831 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
17832                                       SDValue N2, SDValue N3, ISD::CondCode CC,
17833                                       bool NotExtCompare) {
17834   // (x ? y : y) -> y.
17835   if (N2 == N3) return N2;
17836
17837   EVT VT = N2.getValueType();
17838   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
17839   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
17840
17841   // Determine if the condition we're dealing with is constant
17842   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
17843                               N0, N1, CC, DL, false);
17844   if (SCC.getNode()) AddToWorklist(SCC.getNode());
17845
17846   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
17847     // fold select_cc true, x, y -> x
17848     // fold select_cc false, x, y -> y
17849     return !SCCC->isNullValue() ? N2 : N3;
17850   }
17851
17852   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
17853   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
17854   // in it.  This is a win when the constant is not otherwise available because
17855   // it replaces two constant pool loads with one.  We only do this if the FP
17856   // type is known to be legal, because if it isn't, then we are before legalize
17857   // types an we want the other legalization to happen first (e.g. to avoid
17858   // messing with soft float) and if the ConstantFP is not legal, because if
17859   // it is legal, we may not need to store the FP constant in a constant pool.
17860   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
17861     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
17862       if (TLI.isTypeLegal(N2.getValueType()) &&
17863           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
17864                TargetLowering::Legal &&
17865            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
17866            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
17867           // If both constants have multiple uses, then we won't need to do an
17868           // extra load, they are likely around in registers for other users.
17869           (TV->hasOneUse() || FV->hasOneUse())) {
17870         Constant *Elts[] = {
17871           const_cast<ConstantFP*>(FV->getConstantFPValue()),
17872           const_cast<ConstantFP*>(TV->getConstantFPValue())
17873         };
17874         Type *FPTy = Elts[0]->getType();
17875         const DataLayout &TD = DAG.getDataLayout();
17876
17877         // Create a ConstantArray of the two constants.
17878         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
17879         SDValue CPIdx =
17880             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
17881                                 TD.getPrefTypeAlignment(FPTy));
17882         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
17883
17884         // Get the offsets to the 0 and 1 element of the array so that we can
17885         // select between them.
17886         SDValue Zero = DAG.getIntPtrConstant(0, DL);
17887         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
17888         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
17889
17890         SDValue Cond = DAG.getSetCC(DL,
17891                                     getSetCCResultType(N0.getValueType()),
17892                                     N0, N1, CC);
17893         AddToWorklist(Cond.getNode());
17894         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
17895                                           Cond, One, Zero);
17896         AddToWorklist(CstOffset.getNode());
17897         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
17898                             CstOffset);
17899         AddToWorklist(CPIdx.getNode());
17900         return DAG.getLoad(
17901             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
17902             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
17903             Alignment);
17904       }
17905     }
17906
17907   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
17908     return V;
17909
17910   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
17911   // where y is has a single bit set.
17912   // A plaintext description would be, we can turn the SELECT_CC into an AND
17913   // when the condition can be materialized as an all-ones register.  Any
17914   // single bit-test can be materialized as an all-ones register with
17915   // shift-left and shift-right-arith.
17916   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
17917       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
17918     SDValue AndLHS = N0->getOperand(0);
17919     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17920     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
17921       // Shift the tested bit over the sign bit.
17922       const APInt &AndMask = ConstAndRHS->getAPIntValue();
17923       SDValue ShlAmt =
17924         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
17925                         getShiftAmountTy(AndLHS.getValueType()));
17926       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
17927
17928       // Now arithmetic right shift it all the way over, so the result is either
17929       // all-ones, or zero.
17930       SDValue ShrAmt =
17931         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
17932                         getShiftAmountTy(Shl.getValueType()));
17933       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
17934
17935       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
17936     }
17937   }
17938
17939   // fold select C, 16, 0 -> shl C, 4
17940   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
17941       TLI.getBooleanContents(N0.getValueType()) ==
17942           TargetLowering::ZeroOrOneBooleanContent) {
17943
17944     // If the caller doesn't want us to simplify this into a zext of a compare,
17945     // don't do it.
17946     if (NotExtCompare && N2C->isOne())
17947       return SDValue();
17948
17949     // Get a SetCC of the condition
17950     // NOTE: Don't create a SETCC if it's not legal on this target.
17951     if (!LegalOperations ||
17952         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
17953       SDValue Temp, SCC;
17954       // cast from setcc result type to select result type
17955       if (LegalTypes) {
17956         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
17957                             N0, N1, CC);
17958         if (N2.getValueType().bitsLT(SCC.getValueType()))
17959           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
17960                                         N2.getValueType());
17961         else
17962           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
17963                              N2.getValueType(), SCC);
17964       } else {
17965         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
17966         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
17967                            N2.getValueType(), SCC);
17968       }
17969
17970       AddToWorklist(SCC.getNode());
17971       AddToWorklist(Temp.getNode());
17972
17973       if (N2C->isOne())
17974         return Temp;
17975
17976       // shl setcc result by log2 n2c
17977       return DAG.getNode(
17978           ISD::SHL, DL, N2.getValueType(), Temp,
17979           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
17980                           getShiftAmountTy(Temp.getValueType())));
17981     }
17982   }
17983
17984   // Check to see if this is an integer abs.
17985   // select_cc setg[te] X,  0,  X, -X ->
17986   // select_cc setgt    X, -1,  X, -X ->
17987   // select_cc setl[te] X,  0, -X,  X ->
17988   // select_cc setlt    X,  1, -X,  X ->
17989   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
17990   if (N1C) {
17991     ConstantSDNode *SubC = nullptr;
17992     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
17993          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
17994         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
17995       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
17996     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
17997               (N1C->isOne() && CC == ISD::SETLT)) &&
17998              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
17999       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
18000
18001     EVT XType = N0.getValueType();
18002     if (SubC && SubC->isNullValue() && XType.isInteger()) {
18003       SDLoc DL(N0);
18004       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
18005                                   N0,
18006                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
18007                                          getShiftAmountTy(N0.getValueType())));
18008       SDValue Add = DAG.getNode(ISD::ADD, DL,
18009                                 XType, N0, Shift);
18010       AddToWorklist(Shift.getNode());
18011       AddToWorklist(Add.getNode());
18012       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
18013     }
18014   }
18015
18016   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
18017   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
18018   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
18019   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
18020   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
18021   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
18022   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
18023   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
18024   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
18025     SDValue ValueOnZero = N2;
18026     SDValue Count = N3;
18027     // If the condition is NE instead of E, swap the operands.
18028     if (CC == ISD::SETNE)
18029       std::swap(ValueOnZero, Count);
18030     // Check if the value on zero is a constant equal to the bits in the type.
18031     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
18032       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
18033         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
18034         // legal, combine to just cttz.
18035         if ((Count.getOpcode() == ISD::CTTZ ||
18036              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
18037             N0 == Count.getOperand(0) &&
18038             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
18039           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
18040         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
18041         // legal, combine to just ctlz.
18042         if ((Count.getOpcode() == ISD::CTLZ ||
18043              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
18044             N0 == Count.getOperand(0) &&
18045             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
18046           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
18047       }
18048     }
18049   }
18050
18051   return SDValue();
18052 }
18053
18054 /// This is a stub for TargetLowering::SimplifySetCC.
18055 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
18056                                    ISD::CondCode Cond, const SDLoc &DL,
18057                                    bool foldBooleans) {
18058   TargetLowering::DAGCombinerInfo
18059     DagCombineInfo(DAG, Level, false, this);
18060   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
18061 }
18062
18063 /// Given an ISD::SDIV node expressing a divide by constant, return
18064 /// a DAG expression to select that will generate the same value by multiplying
18065 /// by a magic number.
18066 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18067 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
18068   // when optimising for minimum size, we don't want to expand a div to a mul
18069   // and a shift.
18070   if (DAG.getMachineFunction().getFunction().optForMinSize())
18071     return SDValue();
18072
18073   SmallVector<SDNode *, 8> Built;
18074   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
18075     for (SDNode *N : Built)
18076       AddToWorklist(N);
18077     return S;
18078   }
18079
18080   return SDValue();
18081 }
18082
18083 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
18084 /// DAG expression that will generate the same value by right shifting.
18085 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
18086   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
18087   if (!C)
18088     return SDValue();
18089
18090   // Avoid division by zero.
18091   if (C->isNullValue())
18092     return SDValue();
18093
18094   SmallVector<SDNode *, 8> Built;
18095   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
18096     for (SDNode *N : Built)
18097       AddToWorklist(N);
18098     return S;
18099   }
18100
18101   return SDValue();
18102 }
18103
18104 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
18105 /// expression that will generate the same value by multiplying by a magic
18106 /// number.
18107 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18108 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
18109   // when optimising for minimum size, we don't want to expand a div to a mul
18110   // and a shift.
18111   if (DAG.getMachineFunction().getFunction().optForMinSize())
18112     return SDValue();
18113
18114   SmallVector<SDNode *, 8> Built;
18115   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
18116     for (SDNode *N : Built)
18117       AddToWorklist(N);
18118     return S;
18119   }
18120
18121   return SDValue();
18122 }
18123
18124 /// Determines the LogBase2 value for a non-null input value using the
18125 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
18126 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
18127   EVT VT = V.getValueType();
18128   unsigned EltBits = VT.getScalarSizeInBits();
18129   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
18130   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
18131   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
18132   return LogBase2;
18133 }
18134
18135 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18136 /// For the reciprocal, we need to find the zero of the function:
18137 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
18138 ///     =>
18139 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
18140 ///     does not require additional intermediate precision]
18141 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
18142   if (Level >= AfterLegalizeDAG)
18143     return SDValue();
18144
18145   // TODO: Handle half and/or extended types?
18146   EVT VT = Op.getValueType();
18147   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18148     return SDValue();
18149
18150   // If estimates are explicitly disabled for this function, we're done.
18151   MachineFunction &MF = DAG.getMachineFunction();
18152   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
18153   if (Enabled == TLI.ReciprocalEstimate::Disabled)
18154     return SDValue();
18155
18156   // Estimates may be explicitly enabled for this type with a custom number of
18157   // refinement steps.
18158   int Iterations = TLI.getDivRefinementSteps(VT, MF);
18159   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
18160     AddToWorklist(Est.getNode());
18161
18162     if (Iterations) {
18163       EVT VT = Op.getValueType();
18164       SDLoc DL(Op);
18165       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18166
18167       // Newton iterations: Est = Est + Est (1 - Arg * Est)
18168       for (int i = 0; i < Iterations; ++i) {
18169         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
18170         AddToWorklist(NewEst.getNode());
18171
18172         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
18173         AddToWorklist(NewEst.getNode());
18174
18175         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18176         AddToWorklist(NewEst.getNode());
18177
18178         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
18179         AddToWorklist(Est.getNode());
18180       }
18181     }
18182     return Est;
18183   }
18184
18185   return SDValue();
18186 }
18187
18188 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18189 /// For the reciprocal sqrt, we need to find the zero of the function:
18190 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18191 ///     =>
18192 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
18193 /// As a result, we precompute A/2 prior to the iteration loop.
18194 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
18195                                          unsigned Iterations,
18196                                          SDNodeFlags Flags, bool Reciprocal) {
18197   EVT VT = Arg.getValueType();
18198   SDLoc DL(Arg);
18199   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
18200
18201   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
18202   // this entire sequence requires only one FP constant.
18203   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
18204   AddToWorklist(HalfArg.getNode());
18205
18206   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
18207   AddToWorklist(HalfArg.getNode());
18208
18209   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
18210   for (unsigned i = 0; i < Iterations; ++i) {
18211     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
18212     AddToWorklist(NewEst.getNode());
18213
18214     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
18215     AddToWorklist(NewEst.getNode());
18216
18217     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
18218     AddToWorklist(NewEst.getNode());
18219
18220     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18221     AddToWorklist(Est.getNode());
18222   }
18223
18224   // If non-reciprocal square root is requested, multiply the result by Arg.
18225   if (!Reciprocal) {
18226     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
18227     AddToWorklist(Est.getNode());
18228   }
18229
18230   return Est;
18231 }
18232
18233 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18234 /// For the reciprocal sqrt, we need to find the zero of the function:
18235 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18236 ///     =>
18237 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
18238 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
18239                                          unsigned Iterations,
18240                                          SDNodeFlags Flags, bool Reciprocal) {
18241   EVT VT = Arg.getValueType();
18242   SDLoc DL(Arg);
18243   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
18244   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
18245
18246   // This routine must enter the loop below to work correctly
18247   // when (Reciprocal == false).
18248   assert(Iterations > 0);
18249
18250   // Newton iterations for reciprocal square root:
18251   // E = (E * -0.5) * ((A * E) * E + -3.0)
18252   for (unsigned i = 0; i < Iterations; ++i) {
18253     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
18254     AddToWorklist(AE.getNode());
18255
18256     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
18257     AddToWorklist(AEE.getNode());
18258
18259     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
18260     AddToWorklist(RHS.getNode());
18261
18262     // When calculating a square root at the last iteration build:
18263     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
18264     // (notice a common subexpression)
18265     SDValue LHS;
18266     if (Reciprocal || (i + 1) < Iterations) {
18267       // RSQRT: LHS = (E * -0.5)
18268       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
18269     } else {
18270       // SQRT: LHS = (A * E) * -0.5
18271       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
18272     }
18273     AddToWorklist(LHS.getNode());
18274
18275     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
18276     AddToWorklist(Est.getNode());
18277   }
18278
18279   return Est;
18280 }
18281
18282 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
18283 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
18284 /// Op can be zero.
18285 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
18286                                            bool Reciprocal) {
18287   if (Level >= AfterLegalizeDAG)
18288     return SDValue();
18289
18290   // TODO: Handle half and/or extended types?
18291   EVT VT = Op.getValueType();
18292   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18293     return SDValue();
18294
18295   // If estimates are explicitly disabled for this function, we're done.
18296   MachineFunction &MF = DAG.getMachineFunction();
18297   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
18298   if (Enabled == TLI.ReciprocalEstimate::Disabled)
18299     return SDValue();
18300
18301   // Estimates may be explicitly enabled for this type with a custom number of
18302   // refinement steps.
18303   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
18304
18305   bool UseOneConstNR = false;
18306   if (SDValue Est =
18307       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
18308                           Reciprocal)) {
18309     AddToWorklist(Est.getNode());
18310
18311     if (Iterations) {
18312       Est = UseOneConstNR
18313             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
18314             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
18315
18316       if (!Reciprocal) {
18317         // The estimate is now completely wrong if the input was exactly 0.0 or
18318         // possibly a denormal. Force the answer to 0.0 for those cases.
18319         EVT VT = Op.getValueType();
18320         SDLoc DL(Op);
18321         EVT CCVT = getSetCCResultType(VT);
18322         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
18323         const Function &F = DAG.getMachineFunction().getFunction();
18324         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
18325         if (Denorms.getValueAsString().equals("ieee")) {
18326           // fabs(X) < SmallestNormal ? 0.0 : Est
18327           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
18328           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
18329           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
18330           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
18331           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
18332           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
18333           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
18334           AddToWorklist(Fabs.getNode());
18335           AddToWorklist(IsDenorm.getNode());
18336           AddToWorklist(Est.getNode());
18337         } else {
18338           // X == 0.0 ? 0.0 : Est
18339           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
18340           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
18341           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
18342           AddToWorklist(IsZero.getNode());
18343           AddToWorklist(Est.getNode());
18344         }
18345       }
18346     }
18347     return Est;
18348   }
18349
18350   return SDValue();
18351 }
18352
18353 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
18354   return buildSqrtEstimateImpl(Op, Flags, true);
18355 }
18356
18357 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
18358   return buildSqrtEstimateImpl(Op, Flags, false);
18359 }
18360
18361 /// Return true if there is any possibility that the two addresses overlap.
18362 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
18363   // If they are the same then they must be aliases.
18364   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
18365
18366   // If they are both volatile then they cannot be reordered.
18367   if (Op0->isVolatile() && Op1->isVolatile()) return true;
18368
18369   // If one operation reads from invariant memory, and the other may store, they
18370   // cannot alias. These should really be checking the equivalent of mayWrite,
18371   // but it only matters for memory nodes other than load /store.
18372   if (Op0->isInvariant() && Op1->writeMem())
18373     return false;
18374
18375   if (Op1->isInvariant() && Op0->writeMem())
18376     return false;
18377
18378   unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
18379   unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
18380
18381   // Check for BaseIndexOffset matching.
18382   BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
18383   BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
18384   int64_t PtrDiff;
18385   if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
18386     if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
18387       return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
18388
18389     // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
18390     // able to calculate their relative offset if at least one arises
18391     // from an alloca. However, these allocas cannot overlap and we
18392     // can infer there is no alias.
18393     if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
18394       if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
18395         MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
18396         // If the base are the same frame index but the we couldn't find a
18397         // constant offset, (indices are different) be conservative.
18398         if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
18399                        !MFI.isFixedObjectIndex(B->getIndex())))
18400           return false;
18401       }
18402
18403     bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
18404     bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
18405     bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
18406     bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
18407     bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
18408     bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
18409
18410     // If of mismatched base types or checkable indices we can check
18411     // they do not alias.
18412     if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
18413          (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
18414         (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
18415       return false;
18416   }
18417
18418   // If we know required SrcValue1 and SrcValue2 have relatively large
18419   // alignment compared to the size and offset of the access, we may be able
18420   // to prove they do not alias. This check is conservative for now to catch
18421   // cases created by splitting vector types.
18422   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
18423   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
18424   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
18425   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
18426   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
18427       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
18428     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
18429     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
18430
18431     // There is no overlap between these relatively aligned accesses of
18432     // similar size. Return no alias.
18433     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
18434         (OffAlign1 + NumBytes1) <= OffAlign0)
18435       return false;
18436   }
18437
18438   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
18439                    ? CombinerGlobalAA
18440                    : DAG.getSubtarget().useAA();
18441 #ifndef NDEBUG
18442   if (CombinerAAOnlyFunc.getNumOccurrences() &&
18443       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
18444     UseAA = false;
18445 #endif
18446
18447   if (UseAA && AA &&
18448       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
18449     // Use alias analysis information.
18450     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
18451     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
18452     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
18453     AliasResult AAResult =
18454         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
18455                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
18456                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
18457                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
18458     if (AAResult == NoAlias)
18459       return false;
18460   }
18461
18462   // Otherwise we have to assume they alias.
18463   return true;
18464 }
18465
18466 /// Walk up chain skipping non-aliasing memory nodes,
18467 /// looking for aliasing nodes and adding them to the Aliases vector.
18468 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
18469                                    SmallVectorImpl<SDValue> &Aliases) {
18470   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
18471   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
18472
18473   // Get alias information for node.
18474   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
18475
18476   // Starting off.
18477   Chains.push_back(OriginalChain);
18478   unsigned Depth = 0;
18479
18480   // Look at each chain and determine if it is an alias.  If so, add it to the
18481   // aliases list.  If not, then continue up the chain looking for the next
18482   // candidate.
18483   while (!Chains.empty()) {
18484     SDValue Chain = Chains.pop_back_val();
18485
18486     // For TokenFactor nodes, look at each operand and only continue up the
18487     // chain until we reach the depth limit.
18488     //
18489     // FIXME: The depth check could be made to return the last non-aliasing
18490     // chain we found before we hit a tokenfactor rather than the original
18491     // chain.
18492     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
18493       Aliases.clear();
18494       Aliases.push_back(OriginalChain);
18495       return;
18496     }
18497
18498     // Don't bother if we've been before.
18499     if (!Visited.insert(Chain.getNode()).second)
18500       continue;
18501
18502     switch (Chain.getOpcode()) {
18503     case ISD::EntryToken:
18504       // Entry token is ideal chain operand, but handled in FindBetterChain.
18505       break;
18506
18507     case ISD::LOAD:
18508     case ISD::STORE: {
18509       // Get alias information for Chain.
18510       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
18511           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
18512
18513       // If chain is alias then stop here.
18514       if (!(IsLoad && IsOpLoad) &&
18515           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
18516         Aliases.push_back(Chain);
18517       } else {
18518         // Look further up the chain.
18519         Chains.push_back(Chain.getOperand(0));
18520         ++Depth;
18521       }
18522       break;
18523     }
18524
18525     case ISD::TokenFactor:
18526       // We have to check each of the operands of the token factor for "small"
18527       // token factors, so we queue them up.  Adding the operands to the queue
18528       // (stack) in reverse order maintains the original order and increases the
18529       // likelihood that getNode will find a matching token factor (CSE.)
18530       if (Chain.getNumOperands() > 16) {
18531         Aliases.push_back(Chain);
18532         break;
18533       }
18534       for (unsigned n = Chain.getNumOperands(); n;)
18535         Chains.push_back(Chain.getOperand(--n));
18536       ++Depth;
18537       break;
18538
18539     case ISD::CopyFromReg:
18540       // Forward past CopyFromReg.
18541       Chains.push_back(Chain.getOperand(0));
18542       ++Depth;
18543       break;
18544
18545     default:
18546       // For all other instructions we will just have to take what we can get.
18547       Aliases.push_back(Chain);
18548       break;
18549     }
18550   }
18551 }
18552
18553 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
18554 /// (aliasing node.)
18555 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
18556   if (OptLevel == CodeGenOpt::None)
18557     return OldChain;
18558
18559   // Ops for replacing token factor.
18560   SmallVector<SDValue, 8> Aliases;
18561
18562   // Accumulate all the aliases to this node.
18563   GatherAllAliases(N, OldChain, Aliases);
18564
18565   // If no operands then chain to entry token.
18566   if (Aliases.size() == 0)
18567     return DAG.getEntryNode();
18568
18569   // If a single operand then chain to it.  We don't need to revisit it.
18570   if (Aliases.size() == 1)
18571     return Aliases[0];
18572
18573   // Construct a custom tailored token factor.
18574   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
18575 }
18576
18577 // This function tries to collect a bunch of potentially interesting
18578 // nodes to improve the chains of, all at once. This might seem
18579 // redundant, as this function gets called when visiting every store
18580 // node, so why not let the work be done on each store as it's visited?
18581 //
18582 // I believe this is mainly important because MergeConsecutiveStores
18583 // is unable to deal with merging stores of different sizes, so unless
18584 // we improve the chains of all the potential candidates up-front
18585 // before running MergeConsecutiveStores, it might only see some of
18586 // the nodes that will eventually be candidates, and then not be able
18587 // to go from a partially-merged state to the desired final
18588 // fully-merged state.
18589 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
18590   if (OptLevel == CodeGenOpt::None)
18591     return false;
18592
18593   // This holds the base pointer, index, and the offset in bytes from the base
18594   // pointer.
18595   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
18596
18597   // We must have a base and an offset.
18598   if (!BasePtr.getBase().getNode())
18599     return false;
18600
18601   // Do not handle stores to undef base pointers.
18602   if (BasePtr.getBase().isUndef())
18603     return false;
18604
18605   SmallVector<StoreSDNode *, 8> ChainedStores;
18606   ChainedStores.push_back(St);
18607
18608   // Walk up the chain and look for nodes with offsets from the same
18609   // base pointer. Stop when reaching an instruction with a different kind
18610   // or instruction which has a different base pointer.
18611   StoreSDNode *Index = St;
18612   while (Index) {
18613     // If the chain has more than one use, then we can't reorder the mem ops.
18614     if (Index != St && !SDValue(Index, 0)->hasOneUse())
18615       break;
18616
18617     if (Index->isVolatile() || Index->isIndexed())
18618       break;
18619
18620     // Find the base pointer and offset for this memory node.
18621     BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
18622
18623     // Check that the base pointer is the same as the original one.
18624     if (!BasePtr.equalBaseIndex(Ptr, DAG))
18625       break;
18626
18627     // Walk up the chain to find the next store node, ignoring any
18628     // intermediate loads. Any other kind of node will halt the loop.
18629     SDNode *NextInChain = Index->getChain().getNode();
18630     while (true) {
18631       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
18632         // We found a store node. Use it for the next iteration.
18633         if (STn->isVolatile() || STn->isIndexed()) {
18634           Index = nullptr;
18635           break;
18636         }
18637         ChainedStores.push_back(STn);
18638         Index = STn;
18639         break;
18640       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
18641         NextInChain = Ldn->getChain().getNode();
18642         continue;
18643       } else {
18644         Index = nullptr;
18645         break;
18646       }
18647     }// end while
18648   }
18649
18650   // At this point, ChainedStores lists all of the Store nodes
18651   // reachable by iterating up through chain nodes matching the above
18652   // conditions.  For each such store identified, try to find an
18653   // earlier chain to attach the store to which won't violate the
18654   // required ordering.
18655   bool MadeChangeToSt = false;
18656   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
18657
18658   for (StoreSDNode *ChainedStore : ChainedStores) {
18659     SDValue Chain = ChainedStore->getChain();
18660     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
18661
18662     if (Chain != BetterChain) {
18663       if (ChainedStore == St)
18664         MadeChangeToSt = true;
18665       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
18666     }
18667   }
18668
18669   // Do all replacements after finding the replacements to make to avoid making
18670   // the chains more complicated by introducing new TokenFactors.
18671   for (auto Replacement : BetterChains)
18672     replaceStoreChain(Replacement.first, Replacement.second);
18673
18674   return MadeChangeToSt;
18675 }
18676
18677 /// This is the entry point for the file.
18678 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
18679                            CodeGenOpt::Level OptLevel) {
18680   /// This is the main entry point to this class.
18681   DAGCombiner(*this, AA, OptLevel).Run(Level);
18682 }