lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  10 // both before and after the DAG is legalized.
  11 //
  12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  13 // primarily intended to handle simplification opportunities that are implicit
  14 // in the LLVM IR and exposed by the various codegen lowering phases.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #include "llvm/ADT/APFloat.h"
  19 #include "llvm/ADT/APInt.h"
  20 #include "llvm/ADT/ArrayRef.h"
  21 #include "llvm/ADT/DenseMap.h"
  22 #include "llvm/ADT/IntervalMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallBitVector.h"
  28 #include "llvm/ADT/SmallPtrSet.h"
  29 #include "llvm/ADT/SmallSet.h"
  30 #include "llvm/ADT/SmallVector.h"
  31 #include "llvm/ADT/Statistic.h"
  32 #include "llvm/Analysis/AliasAnalysis.h"
  33 #include "llvm/Analysis/MemoryLocation.h"
  34 #include "llvm/CodeGen/DAGCombine.h"
  35 #include "llvm/CodeGen/ISDOpcodes.h"
  36 #include "llvm/CodeGen/MachineFrameInfo.h"
  37 #include "llvm/CodeGen/MachineFunction.h"
  38 #include "llvm/CodeGen/MachineMemOperand.h"
  39 #include "llvm/CodeGen/RuntimeLibcalls.h"
  40 #include "llvm/CodeGen/SelectionDAG.h"
  41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  42 #include "llvm/CodeGen/SelectionDAGNodes.h"
  43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  44 #include "llvm/CodeGen/TargetLowering.h"
  45 #include "llvm/CodeGen/TargetRegisterInfo.h"
  46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  47 #include "llvm/CodeGen/ValueTypes.h"
  48 #include "llvm/IR/Attributes.h"
  49 #include "llvm/IR/Constant.h"
  50 #include "llvm/IR/DataLayout.h"
  51 #include "llvm/IR/DerivedTypes.h"
  52 #include "llvm/IR/Function.h"
  53 #include "llvm/IR/LLVMContext.h"
  54 #include "llvm/IR/Metadata.h"
  55 #include "llvm/Support/Casting.h"
  56 #include "llvm/Support/CodeGen.h"
  57 #include "llvm/Support/CommandLine.h"
  58 #include "llvm/Support/Compiler.h"
  59 #include "llvm/Support/Debug.h"
  60 #include "llvm/Support/ErrorHandling.h"
  61 #include "llvm/Support/KnownBits.h"
  62 #include "llvm/Support/MachineValueType.h"
  63 #include "llvm/Support/MathExtras.h"
  64 #include "llvm/Support/raw_ostream.h"
  65 #include "llvm/Target/TargetMachine.h"
  66 #include "llvm/Target/TargetOptions.h"
  67 #include <algorithm>
  68 #include <cassert>
  69 #include <cstdint>
  70 #include <functional>
  71 #include <iterator>
  72 #include <string>
  73 #include <tuple>
  74 #include <utility>
  75
  76 using namespace llvm;
  77
  78 #define DEBUG_TYPE "dagcombine"
  79
  80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  85 STATISTIC(SlicedLoads, "Number of load sliced");
  86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  87
  88 static cl::opt<bool>
  89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  91
  92 static cl::opt<bool>
  93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  94         cl::desc("Enable DAG combiner's use of TBAA"));
  95
  96 #ifndef NDEBUG
  97 static cl::opt<std::string>
  98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  99                    cl::desc("Only use DAG-combiner alias analysis in this"
 100                             " function"));
 101 #endif
 102
 103 /// Hidden option to stress test load slicing, i.e., when this option
 104 /// is enabled, load slicing bypasses most of its profitability guards.
 105 static cl::opt<bool>
 106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 107                   cl::desc("Bypass the profitability model of load slicing"),
 108                   cl::init(false));
 109
 110 static cl::opt<bool>
 111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 112                     cl::desc("DAG combiner may split indexing from loads"));
 113
 114 namespace {
 115
 116   class DAGCombiner {
 117     SelectionDAG &DAG;
 118     const TargetLowering &TLI;
 119     CombineLevel Level;
 120     CodeGenOpt::Level OptLevel;
 121     bool LegalOperations = false;
 122     bool LegalTypes = false;
 123     bool ForCodeSize;
 124
 125     /// Worklist of all of the nodes that need to be simplified.
 126     ///
 127     /// This must behave as a stack -- new nodes to process are pushed onto the
 128     /// back and when processing we pop off of the back.
 129     ///
 130     /// The worklist will not contain duplicates but may contain null entries
 131     /// due to nodes being deleted from the underlying DAG.
 132     SmallVector<SDNode *, 64> Worklist;
 133
 134     /// Mapping from an SDNode to its position on the worklist.
 135     ///
 136     /// This is used to find and remove nodes from the worklist (by nulling
 137     /// them) when they are deleted from the underlying DAG. It relies on
 138     /// stable indices of nodes within the worklist.
 139     DenseMap<SDNode *, unsigned> WorklistMap;
 140
 141     /// Set of nodes which have been combined (at least once).
 142     ///
 143     /// This is used to allow us to reliably add any operands of a DAG node
 144     /// which have not yet been combined to the worklist.
 145     SmallPtrSet<SDNode *, 32> CombinedNodes;
 146
 147     // AA - Used for DAG load/store alias analysis.
 148     AliasAnalysis *AA;
 149
 150     /// When an instruction is simplified, add all users of the instruction to
 151     /// the work lists because they might get more simplified now.
 152     void AddUsersToWorklist(SDNode *N) {
 153       for (SDNode *Node : N->uses())
 154         AddToWorklist(Node);
 155     }
 156
 157     /// Call the node-specific routine that folds each particular type of node.
 158     SDValue visit(SDNode *N);
 159
 160   public:
 161     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 162         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
 163           OptLevel(OL), AA(AA) {
 164       ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
 165
 166       MaximumLegalStoreInBits = 0;
 167       for (MVT VT : MVT::all_valuetypes())
 168         if (EVT(VT).isSimple() && VT != MVT::Other &&
 169             TLI.isTypeLegal(EVT(VT)) &&
 170             VT.getSizeInBits() >= MaximumLegalStoreInBits)
 171           MaximumLegalStoreInBits = VT.getSizeInBits();
 172     }
 173
 174     /// Add to the worklist making sure its instance is at the back (next to be
 175     /// processed.)
 176     void AddToWorklist(SDNode *N) {
 177       assert(N->getOpcode() != ISD::DELETED_NODE &&
 178              "Deleted Node added to Worklist");
 179
 180       // Skip handle nodes as they can't usefully be combined and confuse the
 181       // zero-use deletion strategy.
 182       if (N->getOpcode() == ISD::HANDLENODE)
 183         return;
 184
 185       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 186         Worklist.push_back(N);
 187     }
 188
 189     /// Remove all instances of N from the worklist.
 190     void removeFromWorklist(SDNode *N) {
 191       CombinedNodes.erase(N);
 192
 193       auto It = WorklistMap.find(N);
 194       if (It == WorklistMap.end())
 195         return; // Not in the worklist.
 196
 197       // Null out the entry rather than erasing it to avoid a linear operation.
 198       Worklist[It->second] = nullptr;
 199       WorklistMap.erase(It);
 200     }
 201
 202     void deleteAndRecombine(SDNode *N);
 203     bool recursivelyDeleteUnusedNodes(SDNode *N);
 204
 205     /// Replaces all uses of the results of one DAG node with new values.
 206     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 207                       bool AddTo = true);
 208
 209     /// Replaces all uses of the results of one DAG node with new values.
 210     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 211       return CombineTo(N, &Res, 1, AddTo);
 212     }
 213
 214     /// Replaces all uses of the results of one DAG node with new values.
 215     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 216                       bool AddTo = true) {
 217       SDValue To[] = { Res0, Res1 };
 218       return CombineTo(N, To, 2, AddTo);
 219     }
 220
 221     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 222
 223   private:
 224     unsigned MaximumLegalStoreInBits;
 225
 226     /// Check the specified integer node value to see if it can be simplified or
 227     /// if things it uses can be simplified by bit propagation.
 228     /// If so, return true.
 229     bool SimplifyDemandedBits(SDValue Op) {
 230       unsigned BitWidth = Op.getScalarValueSizeInBits();
 231       APInt Demanded = APInt::getAllOnesValue(BitWidth);
 232       return SimplifyDemandedBits(Op, Demanded);
 233     }
 234
 235     /// Check the specified vector node value to see if it can be simplified or
 236     /// if things it uses can be simplified as it only uses some of the
 237     /// elements. If so, return true.
 238     bool SimplifyDemandedVectorElts(SDValue Op) {
 239       unsigned NumElts = Op.getValueType().getVectorNumElements();
 240       APInt Demanded = APInt::getAllOnesValue(NumElts);
 241       return SimplifyDemandedVectorElts(Op, Demanded);
 242     }
 243
 244     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
 245     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
 246                                     bool AssumeSingleUse = false);
 247
 248     bool CombineToPreIndexedLoadStore(SDNode *N);
 249     bool CombineToPostIndexedLoadStore(SDNode *N);
 250     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 251     bool SliceUpLoad(SDNode *N);
 252
 253     // Scalars have size 0 to distinguish from singleton vectors.
 254     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
 255     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
 256     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
 257
 258     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 259     ///   load.
 260     ///
 261     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 262     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 263     /// \param EltNo index of the vector element to load.
 264     /// \param OriginalLoad load that EVE came from to be replaced.
 265     /// \returns EVE on success SDValue() on failure.
 266     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 267                                          SDValue EltNo,
 268                                          LoadSDNode *OriginalLoad);
 269     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 270     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 271     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 272     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 273     SDValue PromoteIntBinOp(SDValue Op);
 274     SDValue PromoteIntShiftOp(SDValue Op);
 275     SDValue PromoteExtend(SDValue Op);
 276     bool PromoteLoad(SDValue Op);
 277
 278     /// Call the node-specific routine that knows how to fold each
 279     /// particular type of node. If that doesn't do anything, try the
 280     /// target-specific DAG combines.
 281     SDValue combine(SDNode *N);
 282
 283     // Visitation implementation - Implement dag node combining for different
 284     // node types.  The semantics are as follows:
 285     // Return Value:
 286     //   SDValue.getNode() == 0 - No change was made
 287     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 288     //   otherwise              - N should be replaced by the returned Operand.
 289     //
 290     SDValue visitTokenFactor(SDNode *N);
 291     SDValue visitMERGE_VALUES(SDNode *N);
 292     SDValue visitADD(SDNode *N);
 293     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
 294     SDValue visitSUB(SDNode *N);
 295     SDValue visitADDSAT(SDNode *N);
 296     SDValue visitSUBSAT(SDNode *N);
 297     SDValue visitADDC(SDNode *N);
 298     SDValue visitUADDO(SDNode *N);
 299     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 300     SDValue visitSUBC(SDNode *N);
 301     SDValue visitUSUBO(SDNode *N);
 302     SDValue visitADDE(SDNode *N);
 303     SDValue visitADDCARRY(SDNode *N);
 304     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 305     SDValue visitSUBE(SDNode *N);
 306     SDValue visitSUBCARRY(SDNode *N);
 307     SDValue visitMUL(SDNode *N);
 308     SDValue useDivRem(SDNode *N);
 309     SDValue visitSDIV(SDNode *N);
 310     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 311     SDValue visitUDIV(SDNode *N);
 312     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 313     SDValue visitREM(SDNode *N);
 314     SDValue visitMULHU(SDNode *N);
 315     SDValue visitMULHS(SDNode *N);
 316     SDValue visitSMUL_LOHI(SDNode *N);
 317     SDValue visitUMUL_LOHI(SDNode *N);
 318     SDValue visitSMULO(SDNode *N);
 319     SDValue visitUMULO(SDNode *N);
 320     SDValue visitIMINMAX(SDNode *N);
 321     SDValue visitAND(SDNode *N);
 322     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 323     SDValue visitOR(SDNode *N);
 324     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 325     SDValue visitXOR(SDNode *N);
 326     SDValue SimplifyVBinOp(SDNode *N);
 327     SDValue visitSHL(SDNode *N);
 328     SDValue visitSRA(SDNode *N);
 329     SDValue visitSRL(SDNode *N);
 330     SDValue visitFunnelShift(SDNode *N);
 331     SDValue visitRotate(SDNode *N);
 332     SDValue visitABS(SDNode *N);
 333     SDValue visitBSWAP(SDNode *N);
 334     SDValue visitBITREVERSE(SDNode *N);
 335     SDValue visitCTLZ(SDNode *N);
 336     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 337     SDValue visitCTTZ(SDNode *N);
 338     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 339     SDValue visitCTPOP(SDNode *N);
 340     SDValue visitSELECT(SDNode *N);
 341     SDValue visitVSELECT(SDNode *N);
 342     SDValue visitSELECT_CC(SDNode *N);
 343     SDValue visitSETCC(SDNode *N);
 344     SDValue visitSETCCCARRY(SDNode *N);
 345     SDValue visitSIGN_EXTEND(SDNode *N);
 346     SDValue visitZERO_EXTEND(SDNode *N);
 347     SDValue visitANY_EXTEND(SDNode *N);
 348     SDValue visitAssertExt(SDNode *N);
 349     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 350     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
 351     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
 352     SDValue visitTRUNCATE(SDNode *N);
 353     SDValue visitBITCAST(SDNode *N);
 354     SDValue visitBUILD_PAIR(SDNode *N);
 355     SDValue visitFADD(SDNode *N);
 356     SDValue visitFSUB(SDNode *N);
 357     SDValue visitFMUL(SDNode *N);
 358     SDValue visitFMA(SDNode *N);
 359     SDValue visitFDIV(SDNode *N);
 360     SDValue visitFREM(SDNode *N);
 361     SDValue visitFSQRT(SDNode *N);
 362     SDValue visitFCOPYSIGN(SDNode *N);
 363     SDValue visitFPOW(SDNode *N);
 364     SDValue visitSINT_TO_FP(SDNode *N);
 365     SDValue visitUINT_TO_FP(SDNode *N);
 366     SDValue visitFP_TO_SINT(SDNode *N);
 367     SDValue visitFP_TO_UINT(SDNode *N);
 368     SDValue visitFP_ROUND(SDNode *N);
 369     SDValue visitFP_ROUND_INREG(SDNode *N);
 370     SDValue visitFP_EXTEND(SDNode *N);
 371     SDValue visitFNEG(SDNode *N);
 372     SDValue visitFABS(SDNode *N);
 373     SDValue visitFCEIL(SDNode *N);
 374     SDValue visitFTRUNC(SDNode *N);
 375     SDValue visitFFLOOR(SDNode *N);
 376     SDValue visitFMINNUM(SDNode *N);
 377     SDValue visitFMAXNUM(SDNode *N);
 378     SDValue visitFMINIMUM(SDNode *N);
 379     SDValue visitFMAXIMUM(SDNode *N);
 380     SDValue visitBRCOND(SDNode *N);
 381     SDValue visitBR_CC(SDNode *N);
 382     SDValue visitLOAD(SDNode *N);
 383
 384     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 385     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 386
 387     SDValue visitSTORE(SDNode *N);
 388     SDValue visitLIFETIME_END(SDNode *N);
 389     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 390     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 391     SDValue visitBUILD_VECTOR(SDNode *N);
 392     SDValue visitCONCAT_VECTORS(SDNode *N);
 393     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 394     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 395     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 396     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 397     SDValue visitMLOAD(SDNode *N);
 398     SDValue visitMSTORE(SDNode *N);
 399     SDValue visitMGATHER(SDNode *N);
 400     SDValue visitMSCATTER(SDNode *N);
 401     SDValue visitFP_TO_FP16(SDNode *N);
 402     SDValue visitFP16_TO_FP(SDNode *N);
 403
 404     SDValue visitFADDForFMACombine(SDNode *N);
 405     SDValue visitFSUBForFMACombine(SDNode *N);
 406     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 407
 408     SDValue XformToShuffleWithZero(SDNode *N);
 409     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 410                            SDValue N1, SDNodeFlags Flags);
 411
 412     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 413
 414     SDValue foldSelectOfConstants(SDNode *N);
 415     SDValue foldVSelectOfConstants(SDNode *N);
 416     SDValue foldBinOpIntoSelect(SDNode *BO);
 417     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 418     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
 419     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 420     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 421                              SDValue N2, SDValue N3, ISD::CondCode CC,
 422                              bool NotExtCompare = false);
 423     SDValue convertSelectOfFPConstantsToLoadOffset(
 424         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
 425         ISD::CondCode CC);
 426     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 427                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 428     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 429                               const SDLoc &DL);
 430     SDValue unfoldMaskedMerge(SDNode *N);
 431     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 432     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 433                           const SDLoc &DL, bool foldBooleans);
 434     SDValue rebuildSetCC(SDValue N);
 435
 436     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 437                            SDValue &CC) const;
 438     bool isOneUseSetCC(SDValue N) const;
 439
 440     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 441                                          unsigned HiOp);
 442     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 443     SDValue CombineExtLoad(SDNode *N);
 444     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 445     SDValue combineRepeatedFPDivisors(SDNode *N);
 446     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 447     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 448     SDValue BuildSDIV(SDNode *N);
 449     SDValue BuildSDIVPow2(SDNode *N);
 450     SDValue BuildUDIV(SDNode *N);
 451     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 452     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
 453     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 454     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 455     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 456     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 457                                 SDNodeFlags Flags, bool Reciprocal);
 458     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 459                                 SDNodeFlags Flags, bool Reciprocal);
 460     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 461                                bool DemandHighBits = true);
 462     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 463     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 464                               SDValue InnerPos, SDValue InnerNeg,
 465                               unsigned PosOpcode, unsigned NegOpcode,
 466                               const SDLoc &DL);
 467     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 468     SDValue MatchLoadCombine(SDNode *N);
 469     SDValue ReduceLoadWidth(SDNode *N);
 470     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 471     SDValue splitMergedValStore(StoreSDNode *ST);
 472     SDValue TransformFPLoadStorePair(SDNode *N);
 473     SDValue convertBuildVecZextToZext(SDNode *N);
 474     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 475     SDValue reduceBuildVecToShuffle(SDNode *N);
 476     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 477                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 478                                   SDValue VecIn2, unsigned LeftIdx);
 479     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 480
 481     /// Walk up chain skipping non-aliasing memory nodes,
 482     /// looking for aliasing nodes and adding them to the Aliases vector.
 483     void GatherAllAliases(LSBaseSDNode *N, SDValue OriginalChain,
 484                           SmallVectorImpl<SDValue> &Aliases);
 485
 486     /// Return true if there is any possibility that the two addresses overlap.
 487     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
 488
 489     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 490     /// chain (aliasing node.)
 491     SDValue FindBetterChain(LSBaseSDNode *N, SDValue Chain);
 492
 493     /// Try to replace a store and any possibly adjacent stores on
 494     /// consecutive chains with better chains. Return true only if St is
 495     /// replaced.
 496     ///
 497     /// Notice that other chains may still be replaced even if the function
 498     /// returns false.
 499     bool findBetterNeighborChains(StoreSDNode *St);
 500
 501     // Helper for findBetterNeighborChains. Walk up store chain add additional
 502     // chained stores that do not overlap and can be parallelized.
 503     bool parallelizeChainedStores(StoreSDNode *St);
 504
 505     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 506     /// is located in a sequence of memory operations connected by a chain.
 507     struct MemOpLink {
 508       // Ptr to the mem node.
 509       LSBaseSDNode *MemNode;
 510
 511       // Offset from the base ptr.
 512       int64_t OffsetFromBase;
 513
 514       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 515           : MemNode(N), OffsetFromBase(Offset) {}
 516     };
 517
 518     /// This is a helper function for visitMUL to check the profitability
 519     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 520     /// MulNode is the original multiply, AddNode is (add x, c1),
 521     /// and ConstNode is c2.
 522     bool isMulAddWithConstProfitable(SDNode *MulNode,
 523                                      SDValue &AddNode,
 524                                      SDValue &ConstNode);
 525
 526     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 527     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 528     /// the type of the loaded value to be extended.
 529     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 530                           EVT LoadResultTy, EVT &ExtVT);
 531
 532     /// Helper function to calculate whether the given Load/Store can have its
 533     /// width reduced to ExtVT.
 534     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 535                            EVT &MemVT, unsigned ShAmt = 0);
 536
 537     /// Used by BackwardsPropagateMask to find suitable loads.
 538     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
 539                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 540                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 541     /// Attempt to propagate a given AND node back to load leaves so that they
 542     /// can be combined into narrow loads.
 543     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
 544
 545     /// Helper function for MergeConsecutiveStores which merges the
 546     /// component store chains.
 547     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 548                                 unsigned NumStores);
 549
 550     /// This is a helper function for MergeConsecutiveStores. When the
 551     /// source elements of the consecutive stores are all constants or
 552     /// all extracted vector elements, try to merge them into one
 553     /// larger store introducing bitcasts if necessary.  \return True
 554     /// if a merged store was created.
 555     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 556                                          EVT MemVT, unsigned NumStores,
 557                                          bool IsConstantSrc, bool UseVector,
 558                                          bool UseTrunc);
 559
 560     /// This is a helper function for MergeConsecutiveStores. Stores
 561     /// that potentially may be merged with St are placed in
 562     /// StoreNodes. RootNode is a chain predecessor to all store
 563     /// candidates.
 564     void getStoreMergeCandidates(StoreSDNode *St,
 565                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 566                                  SDNode *&Root);
 567
 568     /// Helper function for MergeConsecutiveStores. Checks if
 569     /// candidate stores have indirect dependency through their
 570     /// operands. RootNode is the predecessor to all stores calculated
 571     /// by getStoreMergeCandidates and is used to prune the dependency check.
 572     /// \return True if safe to merge.
 573     bool checkMergeStoreCandidatesForDependencies(
 574         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 575         SDNode *RootNode);
 576
 577     /// Merge consecutive store operations into a wide store.
 578     /// This optimization uses wide integers or vectors when possible.
 579     /// \return number of stores that were merged into a merged store (the
 580     /// affected nodes are stored as a prefix in \p StoreNodes).
 581     bool MergeConsecutiveStores(StoreSDNode *St);
 582
 583     /// Try to transform a truncation where C is a constant:
 584     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 585     ///
 586     /// \p N needs to be a truncation and its first operand an AND. Other
 587     /// requirements are checked by the function (e.g. that trunc is
 588     /// single-use) and if missed an empty SDValue is returned.
 589     SDValue distributeTruncateThroughAnd(SDNode *N);
 590
 591     /// Helper function to determine whether the target supports operation
 592     /// given by \p Opcode for type \p VT, that is, whether the operation
 593     /// is legal or custom before legalizing operations, and whether is
 594     /// legal (but not custom) after legalization.
 595     bool hasOperation(unsigned Opcode, EVT VT) {
 596       if (LegalOperations)
 597         return TLI.isOperationLegal(Opcode, VT);
 598       return TLI.isOperationLegalOrCustom(Opcode, VT);
 599     }
 600
 601   public:
 602     /// Runs the dag combiner on all nodes in the work list
 603     void Run(CombineLevel AtLevel);
 604
 605     SelectionDAG &getDAG() const { return DAG; }
 606
 607     /// Returns a type large enough to hold any valid shift amount - before type
 608     /// legalization these can be huge.
 609     EVT getShiftAmountTy(EVT LHSTy) {
 610       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 611       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 612     }
 613
 614     /// This method returns true if we are running before type legalization or
 615     /// if the specified VT is legal.
 616     bool isTypeLegal(const EVT &VT) {
 617       if (!LegalTypes) return true;
 618       return TLI.isTypeLegal(VT);
 619     }
 620
 621     /// Convenience wrapper around TargetLowering::getSetCCResultType
 622     EVT getSetCCResultType(EVT VT) const {
 623       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 624     }
 625
 626     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 627                          SDValue OrigLoad, SDValue ExtLoad,
 628                          ISD::NodeType ExtType);
 629   };
 630
 631 /// This class is a DAGUpdateListener that removes any deleted
 632 /// nodes from the worklist.
 633 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 634   DAGCombiner &DC;
 635
 636 public:
 637   explicit WorklistRemover(DAGCombiner &dc)
 638     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 639
 640   void NodeDeleted(SDNode *N, SDNode *E) override {
 641     DC.removeFromWorklist(N);
 642   }
 643 };
 644
 645 } // end anonymous namespace
 646
 647 //===----------------------------------------------------------------------===//
 648 //  TargetLowering::DAGCombinerInfo implementation
 649 //===----------------------------------------------------------------------===//
 650
 651 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 652   ((DAGCombiner*)DC)->AddToWorklist(N);
 653 }
 654
 655 SDValue TargetLowering::DAGCombinerInfo::
 656 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 657   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 658 }
 659
 660 SDValue TargetLowering::DAGCombinerInfo::
 661 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 662   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 663 }
 664
 665 SDValue TargetLowering::DAGCombinerInfo::
 666 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 667   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 668 }
 669
 670 void TargetLowering::DAGCombinerInfo::
 671 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 672   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 673 }
 674
 675 //===----------------------------------------------------------------------===//
 676 // Helper Functions
 677 //===----------------------------------------------------------------------===//
 678
 679 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 680   removeFromWorklist(N);
 681
 682   // If the operands of this node are only used by the node, they will now be
 683   // dead. Make sure to re-visit them and recursively delete dead nodes.
 684   for (const SDValue &Op : N->ops())
 685     // For an operand generating multiple values, one of the values may
 686     // become dead allowing further simplification (e.g. split index
 687     // arithmetic from an indexed load).
 688     if (Op->hasOneUse() || Op->getNumValues() > 1)
 689       AddToWorklist(Op.getNode());
 690
 691   DAG.DeleteNode(N);
 692 }
 693
 694 /// Return 1 if we can compute the negated form of the specified expression for
 695 /// the same cost as the expression itself, or 2 if we can compute the negated
 696 /// form more cheaply than the expression itself.
 697 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
 698                                const TargetLowering &TLI,
 699                                const TargetOptions *Options,
 700                                unsigned Depth = 0) {
 701   // fneg is removable even if it has multiple uses.
 702   if (Op.getOpcode() == ISD::FNEG) return 2;
 703
 704   // Don't allow anything with multiple uses unless we know it is free.
 705   EVT VT = Op.getValueType();
 706   const SDNodeFlags Flags = Op->getFlags();
 707   if (!Op.hasOneUse())
 708     if (!(Op.getOpcode() == ISD::FP_EXTEND &&
 709           TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
 710       return 0;
 711
 712   // Don't recurse exponentially.
 713   if (Depth > 6) return 0;
 714
 715   switch (Op.getOpcode()) {
 716   default: return false;
 717   case ISD::ConstantFP: {
 718     if (!LegalOperations)
 719       return 1;
 720
 721     // Don't invert constant FP values after legalization unless the target says
 722     // the negated constant is legal.
 723     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
 724       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
 725   }
 726   case ISD::FADD:
 727     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
 728       return 0;
 729
 730     // After operation legalization, it might not be legal to create new FSUBs.
 731     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
 732       return 0;
 733
 734     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 735     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 736                                     Options, Depth + 1))
 737       return V;
 738     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 739     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 740                               Depth + 1);
 741   case ISD::FSUB:
 742     // We can't turn -(A-B) into B-A when we honor signed zeros.
 743     if (!Options->NoSignedZerosFPMath &&
 744         !Flags.hasNoSignedZeros())
 745       return 0;
 746
 747     // fold (fneg (fsub A, B)) -> (fsub B, A)
 748     return 1;
 749
 750   case ISD::FMUL:
 751   case ISD::FDIV:
 752     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
 753     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 754                                     Options, Depth + 1))
 755       return V;
 756
 757     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 758                               Depth + 1);
 759
 760   case ISD::FP_EXTEND:
 761   case ISD::FP_ROUND:
 762   case ISD::FSIN:
 763     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
 764                               Depth + 1);
 765   }
 766 }
 767
 768 /// If isNegatibleForFree returns true, return the newly negated expression.
 769 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 770                                     bool LegalOperations, unsigned Depth = 0) {
 771   const TargetOptions &Options = DAG.getTarget().Options;
 772   // fneg is removable even if it has multiple uses.
 773   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
 774
 775   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
 776
 777   const SDNodeFlags Flags = Op.getNode()->getFlags();
 778
 779   switch (Op.getOpcode()) {
 780   default: llvm_unreachable("Unknown code");
 781   case ISD::ConstantFP: {
 782     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
 783     V.changeSign();
 784     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
 785   }
 786   case ISD::FADD:
 787     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
 788
 789     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 790     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 791                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
 792       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 793                          GetNegatedExpression(Op.getOperand(0), DAG,
 794                                               LegalOperations, Depth+1),
 795                          Op.getOperand(1), Flags);
 796     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 797     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 798                        GetNegatedExpression(Op.getOperand(1), DAG,
 799                                             LegalOperations, Depth+1),
 800                        Op.getOperand(0), Flags);
 801   case ISD::FSUB:
 802     // fold (fneg (fsub 0, B)) -> B
 803     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
 804       if (N0CFP->isZero())
 805         return Op.getOperand(1);
 806
 807     // fold (fneg (fsub A, B)) -> (fsub B, A)
 808     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 809                        Op.getOperand(1), Op.getOperand(0), Flags);
 810
 811   case ISD::FMUL:
 812   case ISD::FDIV:
 813     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
 814     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 815                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
 816       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 817                          GetNegatedExpression(Op.getOperand(0), DAG,
 818                                               LegalOperations, Depth+1),
 819                          Op.getOperand(1), Flags);
 820
 821     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
 822     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 823                        Op.getOperand(0),
 824                        GetNegatedExpression(Op.getOperand(1), DAG,
 825                                             LegalOperations, Depth+1), Flags);
 826
 827   case ISD::FP_EXTEND:
 828   case ISD::FSIN:
 829     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 830                        GetNegatedExpression(Op.getOperand(0), DAG,
 831                                             LegalOperations, Depth+1));
 832   case ISD::FP_ROUND:
 833       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
 834                          GetNegatedExpression(Op.getOperand(0), DAG,
 835                                               LegalOperations, Depth+1),
 836                          Op.getOperand(1));
 837   }
 838 }
 839
 840 // APInts must be the same size for most operations, this helper
 841 // function zero extends the shorter of the pair so that they match.
 842 // We provide an Offset so that we can create bitwidths that won't overflow.
 843 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 844   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 845   LHS = LHS.zextOrSelf(Bits);
 846   RHS = RHS.zextOrSelf(Bits);
 847 }
 848
 849 // Return true if this node is a setcc, or is a select_cc
 850 // that selects between the target values used for true and false, making it
 851 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 852 // the appropriate nodes based on the type of node we are checking. This
 853 // simplifies life a bit for the callers.
 854 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 855                                     SDValue &CC) const {
 856   if (N.getOpcode() == ISD::SETCC) {
 857     LHS = N.getOperand(0);
 858     RHS = N.getOperand(1);
 859     CC  = N.getOperand(2);
 860     return true;
 861   }
 862
 863   if (N.getOpcode() != ISD::SELECT_CC ||
 864       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 865       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 866     return false;
 867
 868   if (TLI.getBooleanContents(N.getValueType()) ==
 869       TargetLowering::UndefinedBooleanContent)
 870     return false;
 871
 872   LHS = N.getOperand(0);
 873   RHS = N.getOperand(1);
 874   CC  = N.getOperand(4);
 875   return true;
 876 }
 877
 878 /// Return true if this is a SetCC-equivalent operation with only one use.
 879 /// If this is true, it allows the users to invert the operation for free when
 880 /// it is profitable to do so.
 881 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 882   SDValue N0, N1, N2;
 883   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 884     return true;
 885   return false;
 886 }
 887
 888 // Returns the SDNode if it is a constant float BuildVector
 889 // or constant float.
 890 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
 891   if (isa<ConstantFPSDNode>(N))
 892     return N.getNode();
 893   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
 894     return N.getNode();
 895   return nullptr;
 896 }
 897
 898 // Determines if it is a constant integer or a build vector of constant
 899 // integers (and undefs).
 900 // Do not permit build vector implicit truncation.
 901 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 902   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 903     return !(Const->isOpaque() && NoOpaques);
 904   if (N.getOpcode() != ISD::BUILD_VECTOR)
 905     return false;
 906   unsigned BitWidth = N.getScalarValueSizeInBits();
 907   for (const SDValue &Op : N->op_values()) {
 908     if (Op.isUndef())
 909       continue;
 910     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 911     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 912         (Const->isOpaque() && NoOpaques))
 913       return false;
 914   }
 915   return true;
 916 }
 917
 918 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 919 // undef's.
 920 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
 921   if (V.getOpcode() != ISD::BUILD_VECTOR)
 922     return false;
 923   return isConstantOrConstantVector(V, NoOpaques) ||
 924          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 925 }
 926
 927 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 928                                     SDValue N1, SDNodeFlags Flags) {
 929   // Don't reassociate reductions.
 930   if (Flags.hasVectorReduction())
 931     return SDValue();
 932
 933   EVT VT = N0.getValueType();
 934   if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
 935     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
 936       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
 937         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
 938         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
 939           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
 940         return SDValue();
 941       }
 942       if (N0.hasOneUse()) {
 943         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
 944         // use
 945         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
 946         if (!OpNode.getNode())
 947           return SDValue();
 948         AddToWorklist(OpNode.getNode());
 949         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
 950       }
 951     }
 952   }
 953
 954   if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
 955     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
 956       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
 957         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
 958         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
 959           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
 960         return SDValue();
 961       }
 962       if (N1.hasOneUse()) {
 963         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
 964         // use
 965         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
 966         if (!OpNode.getNode())
 967           return SDValue();
 968         AddToWorklist(OpNode.getNode());
 969         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
 970       }
 971     }
 972   }
 973
 974   return SDValue();
 975 }
 976
 977 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 978                                bool AddTo) {
 979   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
 980   ++NodesCombined;
 981   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
 982              To[0].getNode()->dump(&DAG);
 983              dbgs() << " and " << NumTo - 1 << " other values\n");
 984   for (unsigned i = 0, e = NumTo; i != e; ++i)
 985     assert((!To[i].getNode() ||
 986             N->getValueType(i) == To[i].getValueType()) &&
 987            "Cannot combine value to value of different type!");
 988
 989   WorklistRemover DeadNodes(*this);
 990   DAG.ReplaceAllUsesWith(N, To);
 991   if (AddTo) {
 992     // Push the new nodes and any users onto the worklist
 993     for (unsigned i = 0, e = NumTo; i != e; ++i) {
 994       if (To[i].getNode()) {
 995         AddToWorklist(To[i].getNode());
 996         AddUsersToWorklist(To[i].getNode());
 997       }
 998     }
 999   }
1000
1001   // Finally, if the node is now dead, remove it from the graph.  The node
1002   // may not be dead if the replacement process recursively simplified to
1003   // something else needing this node.
1004   if (N->use_empty())
1005     deleteAndRecombine(N);
1006   return SDValue(N, 0);
1007 }
1008
1009 void DAGCombiner::
1010 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1011   // Replace all uses.  If any nodes become isomorphic to other nodes and
1012   // are deleted, make sure to remove them from our worklist.
1013   WorklistRemover DeadNodes(*this);
1014   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1015
1016   // Push the new node and any (possibly new) users onto the worklist.
1017   AddToWorklist(TLO.New.getNode());
1018   AddUsersToWorklist(TLO.New.getNode());
1019
1020   // Finally, if the node is now dead, remove it from the graph.  The node
1021   // may not be dead if the replacement process recursively simplified to
1022   // something else needing this node.
1023   if (TLO.Old.getNode()->use_empty())
1024     deleteAndRecombine(TLO.Old.getNode());
1025 }
1026
1027 /// Check the specified integer node value to see if it can be simplified or if
1028 /// things it uses can be simplified by bit propagation. If so, return true.
1029 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1030   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1031   KnownBits Known;
1032   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1033     return false;
1034
1035   // Revisit the node.
1036   AddToWorklist(Op.getNode());
1037
1038   // Replace the old value with the new one.
1039   ++NodesCombined;
1040   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1041              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1042              dbgs() << '\n');
1043
1044   CommitTargetLoweringOpt(TLO);
1045   return true;
1046 }
1047
1048 /// Check the specified vector node value to see if it can be simplified or
1049 /// if things it uses can be simplified as it only uses some of the elements.
1050 /// If so, return true.
1051 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1052                                              bool AssumeSingleUse) {
1053   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1054   APInt KnownUndef, KnownZero;
1055   if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1056                                       0, AssumeSingleUse))
1057     return false;
1058
1059   // Revisit the node.
1060   AddToWorklist(Op.getNode());
1061
1062   // Replace the old value with the new one.
1063   ++NodesCombined;
1064   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1065              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1066              dbgs() << '\n');
1067
1068   CommitTargetLoweringOpt(TLO);
1069   return true;
1070 }
1071
1072 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1073   SDLoc DL(Load);
1074   EVT VT = Load->getValueType(0);
1075   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1076
1077   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1078              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1079   WorklistRemover DeadNodes(*this);
1080   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1081   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1082   deleteAndRecombine(Load);
1083   AddToWorklist(Trunc.getNode());
1084 }
1085
1086 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1087   Replace = false;
1088   SDLoc DL(Op);
1089   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1090     LoadSDNode *LD = cast<LoadSDNode>(Op);
1091     EVT MemVT = LD->getMemoryVT();
1092     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1093                                                       : LD->getExtensionType();
1094     Replace = true;
1095     return DAG.getExtLoad(ExtType, DL, PVT,
1096                           LD->getChain(), LD->getBasePtr(),
1097                           MemVT, LD->getMemOperand());
1098   }
1099
1100   unsigned Opc = Op.getOpcode();
1101   switch (Opc) {
1102   default: break;
1103   case ISD::AssertSext:
1104     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1105       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1106     break;
1107   case ISD::AssertZext:
1108     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1109       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1110     break;
1111   case ISD::Constant: {
1112     unsigned ExtOpc =
1113       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1114     return DAG.getNode(ExtOpc, DL, PVT, Op);
1115   }
1116   }
1117
1118   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1119     return SDValue();
1120   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1121 }
1122
1123 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1124   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1125     return SDValue();
1126   EVT OldVT = Op.getValueType();
1127   SDLoc DL(Op);
1128   bool Replace = false;
1129   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1130   if (!NewOp.getNode())
1131     return SDValue();
1132   AddToWorklist(NewOp.getNode());
1133
1134   if (Replace)
1135     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1136   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1137                      DAG.getValueType(OldVT));
1138 }
1139
1140 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1141   EVT OldVT = Op.getValueType();
1142   SDLoc DL(Op);
1143   bool Replace = false;
1144   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1145   if (!NewOp.getNode())
1146     return SDValue();
1147   AddToWorklist(NewOp.getNode());
1148
1149   if (Replace)
1150     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1151   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1152 }
1153
1154 /// Promote the specified integer binary operation if the target indicates it is
1155 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1156 /// i32 since i16 instructions are longer.
1157 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1158   if (!LegalOperations)
1159     return SDValue();
1160
1161   EVT VT = Op.getValueType();
1162   if (VT.isVector() || !VT.isInteger())
1163     return SDValue();
1164
1165   // If operation type is 'undesirable', e.g. i16 on x86, consider
1166   // promoting it.
1167   unsigned Opc = Op.getOpcode();
1168   if (TLI.isTypeDesirableForOp(Opc, VT))
1169     return SDValue();
1170
1171   EVT PVT = VT;
1172   // Consult target whether it is a good idea to promote this operation and
1173   // what's the right type to promote it to.
1174   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1175     assert(PVT != VT && "Don't know what type to promote to!");
1176
1177     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1178
1179     bool Replace0 = false;
1180     SDValue N0 = Op.getOperand(0);
1181     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1182
1183     bool Replace1 = false;
1184     SDValue N1 = Op.getOperand(1);
1185     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1186     SDLoc DL(Op);
1187
1188     SDValue RV =
1189         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1190
1191     // We are always replacing N0/N1's use in N and only need
1192     // additional replacements if there are additional uses.
1193     Replace0 &= !N0->hasOneUse();
1194     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1195
1196     // Combine Op here so it is preserved past replacements.
1197     CombineTo(Op.getNode(), RV);
1198
1199     // If operands have a use ordering, make sure we deal with
1200     // predecessor first.
1201     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1202       std::swap(N0, N1);
1203       std::swap(NN0, NN1);
1204     }
1205
1206     if (Replace0) {
1207       AddToWorklist(NN0.getNode());
1208       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1209     }
1210     if (Replace1) {
1211       AddToWorklist(NN1.getNode());
1212       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1213     }
1214     return Op;
1215   }
1216   return SDValue();
1217 }
1218
1219 /// Promote the specified integer shift operation if the target indicates it is
1220 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1221 /// i32 since i16 instructions are longer.
1222 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1223   if (!LegalOperations)
1224     return SDValue();
1225
1226   EVT VT = Op.getValueType();
1227   if (VT.isVector() || !VT.isInteger())
1228     return SDValue();
1229
1230   // If operation type is 'undesirable', e.g. i16 on x86, consider
1231   // promoting it.
1232   unsigned Opc = Op.getOpcode();
1233   if (TLI.isTypeDesirableForOp(Opc, VT))
1234     return SDValue();
1235
1236   EVT PVT = VT;
1237   // Consult target whether it is a good idea to promote this operation and
1238   // what's the right type to promote it to.
1239   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1240     assert(PVT != VT && "Don't know what type to promote to!");
1241
1242     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1243
1244     bool Replace = false;
1245     SDValue N0 = Op.getOperand(0);
1246     SDValue N1 = Op.getOperand(1);
1247     if (Opc == ISD::SRA)
1248       N0 = SExtPromoteOperand(N0, PVT);
1249     else if (Opc == ISD::SRL)
1250       N0 = ZExtPromoteOperand(N0, PVT);
1251     else
1252       N0 = PromoteOperand(N0, PVT, Replace);
1253
1254     if (!N0.getNode())
1255       return SDValue();
1256
1257     SDLoc DL(Op);
1258     SDValue RV =
1259         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1260
1261     AddToWorklist(N0.getNode());
1262     if (Replace)
1263       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1264
1265     // Deal with Op being deleted.
1266     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1267       return RV;
1268   }
1269   return SDValue();
1270 }
1271
1272 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1273   if (!LegalOperations)
1274     return SDValue();
1275
1276   EVT VT = Op.getValueType();
1277   if (VT.isVector() || !VT.isInteger())
1278     return SDValue();
1279
1280   // If operation type is 'undesirable', e.g. i16 on x86, consider
1281   // promoting it.
1282   unsigned Opc = Op.getOpcode();
1283   if (TLI.isTypeDesirableForOp(Opc, VT))
1284     return SDValue();
1285
1286   EVT PVT = VT;
1287   // Consult target whether it is a good idea to promote this operation and
1288   // what's the right type to promote it to.
1289   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1290     assert(PVT != VT && "Don't know what type to promote to!");
1291     // fold (aext (aext x)) -> (aext x)
1292     // fold (aext (zext x)) -> (zext x)
1293     // fold (aext (sext x)) -> (sext x)
1294     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1295     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1296   }
1297   return SDValue();
1298 }
1299
1300 bool DAGCombiner::PromoteLoad(SDValue Op) {
1301   if (!LegalOperations)
1302     return false;
1303
1304   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1305     return false;
1306
1307   EVT VT = Op.getValueType();
1308   if (VT.isVector() || !VT.isInteger())
1309     return false;
1310
1311   // If operation type is 'undesirable', e.g. i16 on x86, consider
1312   // promoting it.
1313   unsigned Opc = Op.getOpcode();
1314   if (TLI.isTypeDesirableForOp(Opc, VT))
1315     return false;
1316
1317   EVT PVT = VT;
1318   // Consult target whether it is a good idea to promote this operation and
1319   // what's the right type to promote it to.
1320   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1321     assert(PVT != VT && "Don't know what type to promote to!");
1322
1323     SDLoc DL(Op);
1324     SDNode *N = Op.getNode();
1325     LoadSDNode *LD = cast<LoadSDNode>(N);
1326     EVT MemVT = LD->getMemoryVT();
1327     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1328                                                       : LD->getExtensionType();
1329     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1330                                    LD->getChain(), LD->getBasePtr(),
1331                                    MemVT, LD->getMemOperand());
1332     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1333
1334     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1335                Result.getNode()->dump(&DAG); dbgs() << '\n');
1336     WorklistRemover DeadNodes(*this);
1337     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1338     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1339     deleteAndRecombine(N);
1340     AddToWorklist(Result.getNode());
1341     return true;
1342   }
1343   return false;
1344 }
1345
1346 /// Recursively delete a node which has no uses and any operands for
1347 /// which it is the only use.
1348 ///
1349 /// Note that this both deletes the nodes and removes them from the worklist.
1350 /// It also adds any nodes who have had a user deleted to the worklist as they
1351 /// may now have only one use and subject to other combines.
1352 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1353   if (!N->use_empty())
1354     return false;
1355
1356   SmallSetVector<SDNode *, 16> Nodes;
1357   Nodes.insert(N);
1358   do {
1359     N = Nodes.pop_back_val();
1360     if (!N)
1361       continue;
1362
1363     if (N->use_empty()) {
1364       for (const SDValue &ChildN : N->op_values())
1365         Nodes.insert(ChildN.getNode());
1366
1367       removeFromWorklist(N);
1368       DAG.DeleteNode(N);
1369     } else {
1370       AddToWorklist(N);
1371     }
1372   } while (!Nodes.empty());
1373   return true;
1374 }
1375
1376 //===----------------------------------------------------------------------===//
1377 //  Main DAG Combiner implementation
1378 //===----------------------------------------------------------------------===//
1379
1380 void DAGCombiner::Run(CombineLevel AtLevel) {
1381   // set the instance variables, so that the various visit routines may use it.
1382   Level = AtLevel;
1383   LegalOperations = Level >= AfterLegalizeVectorOps;
1384   LegalTypes = Level >= AfterLegalizeTypes;
1385
1386   // Add all the dag nodes to the worklist.
1387   for (SDNode &Node : DAG.allnodes())
1388     AddToWorklist(&Node);
1389
1390   // Create a dummy node (which is not added to allnodes), that adds a reference
1391   // to the root node, preventing it from being deleted, and tracking any
1392   // changes of the root.
1393   HandleSDNode Dummy(DAG.getRoot());
1394
1395   // While the worklist isn't empty, find a node and try to combine it.
1396   while (!WorklistMap.empty()) {
1397     SDNode *N;
1398     // The Worklist holds the SDNodes in order, but it may contain null entries.
1399     do {
1400       N = Worklist.pop_back_val();
1401     } while (!N);
1402
1403     bool GoodWorklistEntry = WorklistMap.erase(N);
1404     (void)GoodWorklistEntry;
1405     assert(GoodWorklistEntry &&
1406            "Found a worklist entry without a corresponding map entry!");
1407
1408     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1409     // N is deleted from the DAG, since they too may now be dead or may have a
1410     // reduced number of uses, allowing other xforms.
1411     if (recursivelyDeleteUnusedNodes(N))
1412       continue;
1413
1414     WorklistRemover DeadNodes(*this);
1415
1416     // If this combine is running after legalizing the DAG, re-legalize any
1417     // nodes pulled off the worklist.
1418     if (Level == AfterLegalizeDAG) {
1419       SmallSetVector<SDNode *, 16> UpdatedNodes;
1420       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1421
1422       for (SDNode *LN : UpdatedNodes) {
1423         AddToWorklist(LN);
1424         AddUsersToWorklist(LN);
1425       }
1426       if (!NIsValid)
1427         continue;
1428     }
1429
1430     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1431
1432     // Add any operands of the new node which have not yet been combined to the
1433     // worklist as well. Because the worklist uniques things already, this
1434     // won't repeatedly process the same operand.
1435     CombinedNodes.insert(N);
1436     for (const SDValue &ChildN : N->op_values())
1437       if (!CombinedNodes.count(ChildN.getNode()))
1438         AddToWorklist(ChildN.getNode());
1439
1440     SDValue RV = combine(N);
1441
1442     if (!RV.getNode())
1443       continue;
1444
1445     ++NodesCombined;
1446
1447     // If we get back the same node we passed in, rather than a new node or
1448     // zero, we know that the node must have defined multiple values and
1449     // CombineTo was used.  Since CombineTo takes care of the worklist
1450     // mechanics for us, we have no work to do in this case.
1451     if (RV.getNode() == N)
1452       continue;
1453
1454     assert(N->getOpcode() != ISD::DELETED_NODE &&
1455            RV.getOpcode() != ISD::DELETED_NODE &&
1456            "Node was deleted but visit returned new node!");
1457
1458     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1459
1460     if (N->getNumValues() == RV.getNode()->getNumValues())
1461       DAG.ReplaceAllUsesWith(N, RV.getNode());
1462     else {
1463       assert(N->getValueType(0) == RV.getValueType() &&
1464              N->getNumValues() == 1 && "Type mismatch");
1465       DAG.ReplaceAllUsesWith(N, &RV);
1466     }
1467
1468     // Push the new node and any users onto the worklist
1469     AddToWorklist(RV.getNode());
1470     AddUsersToWorklist(RV.getNode());
1471
1472     // Finally, if the node is now dead, remove it from the graph.  The node
1473     // may not be dead if the replacement process recursively simplified to
1474     // something else needing this node. This will also take care of adding any
1475     // operands which have lost a user to the worklist.
1476     recursivelyDeleteUnusedNodes(N);
1477   }
1478
1479   // If the root changed (e.g. it was a dead load, update the root).
1480   DAG.setRoot(Dummy.getValue());
1481   DAG.RemoveDeadNodes();
1482 }
1483
1484 SDValue DAGCombiner::visit(SDNode *N) {
1485   switch (N->getOpcode()) {
1486   default: break;
1487   case ISD::TokenFactor:        return visitTokenFactor(N);
1488   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1489   case ISD::ADD:                return visitADD(N);
1490   case ISD::SUB:                return visitSUB(N);
1491   case ISD::SADDSAT:
1492   case ISD::UADDSAT:            return visitADDSAT(N);
1493   case ISD::SSUBSAT:
1494   case ISD::USUBSAT:            return visitSUBSAT(N);
1495   case ISD::ADDC:               return visitADDC(N);
1496   case ISD::UADDO:              return visitUADDO(N);
1497   case ISD::SUBC:               return visitSUBC(N);
1498   case ISD::USUBO:              return visitUSUBO(N);
1499   case ISD::ADDE:               return visitADDE(N);
1500   case ISD::ADDCARRY:           return visitADDCARRY(N);
1501   case ISD::SUBE:               return visitSUBE(N);
1502   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1503   case ISD::MUL:                return visitMUL(N);
1504   case ISD::SDIV:               return visitSDIV(N);
1505   case ISD::UDIV:               return visitUDIV(N);
1506   case ISD::SREM:
1507   case ISD::UREM:               return visitREM(N);
1508   case ISD::MULHU:              return visitMULHU(N);
1509   case ISD::MULHS:              return visitMULHS(N);
1510   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1511   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1512   case ISD::SMULO:              return visitSMULO(N);
1513   case ISD::UMULO:              return visitUMULO(N);
1514   case ISD::SMIN:
1515   case ISD::SMAX:
1516   case ISD::UMIN:
1517   case ISD::UMAX:               return visitIMINMAX(N);
1518   case ISD::AND:                return visitAND(N);
1519   case ISD::OR:                 return visitOR(N);
1520   case ISD::XOR:                return visitXOR(N);
1521   case ISD::SHL:                return visitSHL(N);
1522   case ISD::SRA:                return visitSRA(N);
1523   case ISD::SRL:                return visitSRL(N);
1524   case ISD::ROTR:
1525   case ISD::ROTL:               return visitRotate(N);
1526   case ISD::FSHL:
1527   case ISD::FSHR:               return visitFunnelShift(N);
1528   case ISD::ABS:                return visitABS(N);
1529   case ISD::BSWAP:              return visitBSWAP(N);
1530   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1531   case ISD::CTLZ:               return visitCTLZ(N);
1532   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1533   case ISD::CTTZ:               return visitCTTZ(N);
1534   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1535   case ISD::CTPOP:              return visitCTPOP(N);
1536   case ISD::SELECT:             return visitSELECT(N);
1537   case ISD::VSELECT:            return visitVSELECT(N);
1538   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1539   case ISD::SETCC:              return visitSETCC(N);
1540   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1541   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1542   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1543   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1544   case ISD::AssertSext:
1545   case ISD::AssertZext:         return visitAssertExt(N);
1546   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1547   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1548   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1549   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1550   case ISD::BITCAST:            return visitBITCAST(N);
1551   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1552   case ISD::FADD:               return visitFADD(N);
1553   case ISD::FSUB:               return visitFSUB(N);
1554   case ISD::FMUL:               return visitFMUL(N);
1555   case ISD::FMA:                return visitFMA(N);
1556   case ISD::FDIV:               return visitFDIV(N);
1557   case ISD::FREM:               return visitFREM(N);
1558   case ISD::FSQRT:              return visitFSQRT(N);
1559   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1560   case ISD::FPOW:               return visitFPOW(N);
1561   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1562   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1563   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1564   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1565   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1566   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1567   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1568   case ISD::FNEG:               return visitFNEG(N);
1569   case ISD::FABS:               return visitFABS(N);
1570   case ISD::FFLOOR:             return visitFFLOOR(N);
1571   case ISD::FMINNUM:            return visitFMINNUM(N);
1572   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1573   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1574   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1575   case ISD::FCEIL:              return visitFCEIL(N);
1576   case ISD::FTRUNC:             return visitFTRUNC(N);
1577   case ISD::BRCOND:             return visitBRCOND(N);
1578   case ISD::BR_CC:              return visitBR_CC(N);
1579   case ISD::LOAD:               return visitLOAD(N);
1580   case ISD::STORE:              return visitSTORE(N);
1581   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1582   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1583   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1584   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1585   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1586   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1587   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1588   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1589   case ISD::MGATHER:            return visitMGATHER(N);
1590   case ISD::MLOAD:              return visitMLOAD(N);
1591   case ISD::MSCATTER:           return visitMSCATTER(N);
1592   case ISD::MSTORE:             return visitMSTORE(N);
1593   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1594   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1595   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1596   }
1597   return SDValue();
1598 }
1599
1600 SDValue DAGCombiner::combine(SDNode *N) {
1601   SDValue RV = visit(N);
1602
1603   // If nothing happened, try a target-specific DAG combine.
1604   if (!RV.getNode()) {
1605     assert(N->getOpcode() != ISD::DELETED_NODE &&
1606            "Node was deleted but visit returned NULL!");
1607
1608     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1609         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1610
1611       // Expose the DAG combiner to the target combiner impls.
1612       TargetLowering::DAGCombinerInfo
1613         DagCombineInfo(DAG, Level, false, this);
1614
1615       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1616     }
1617   }
1618
1619   // If nothing happened still, try promoting the operation.
1620   if (!RV.getNode()) {
1621     switch (N->getOpcode()) {
1622     default: break;
1623     case ISD::ADD:
1624     case ISD::SUB:
1625     case ISD::MUL:
1626     case ISD::AND:
1627     case ISD::OR:
1628     case ISD::XOR:
1629       RV = PromoteIntBinOp(SDValue(N, 0));
1630       break;
1631     case ISD::SHL:
1632     case ISD::SRA:
1633     case ISD::SRL:
1634       RV = PromoteIntShiftOp(SDValue(N, 0));
1635       break;
1636     case ISD::SIGN_EXTEND:
1637     case ISD::ZERO_EXTEND:
1638     case ISD::ANY_EXTEND:
1639       RV = PromoteExtend(SDValue(N, 0));
1640       break;
1641     case ISD::LOAD:
1642       if (PromoteLoad(SDValue(N, 0)))
1643         RV = SDValue(N, 0);
1644       break;
1645     }
1646   }
1647
1648   // If N is a commutative binary node, try eliminate it if the commuted
1649   // version is already present in the DAG.
1650   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1651       N->getNumValues() == 1) {
1652     SDValue N0 = N->getOperand(0);
1653     SDValue N1 = N->getOperand(1);
1654
1655     // Constant operands are canonicalized to RHS.
1656     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1657       SDValue Ops[] = {N1, N0};
1658       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1659                                             N->getFlags());
1660       if (CSENode)
1661         return SDValue(CSENode, 0);
1662     }
1663   }
1664
1665   return RV;
1666 }
1667
1668 /// Given a node, return its input chain if it has one, otherwise return a null
1669 /// sd operand.
1670 static SDValue getInputChainForNode(SDNode *N) {
1671   if (unsigned NumOps = N->getNumOperands()) {
1672     if (N->getOperand(0).getValueType() == MVT::Other)
1673       return N->getOperand(0);
1674     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1675       return N->getOperand(NumOps-1);
1676     for (unsigned i = 1; i < NumOps-1; ++i)
1677       if (N->getOperand(i).getValueType() == MVT::Other)
1678         return N->getOperand(i);
1679   }
1680   return SDValue();
1681 }
1682
1683 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1684   // If N has two operands, where one has an input chain equal to the other,
1685   // the 'other' chain is redundant.
1686   if (N->getNumOperands() == 2) {
1687     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1688       return N->getOperand(0);
1689     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1690       return N->getOperand(1);
1691   }
1692
1693   // Don't simplify token factors if optnone.
1694   if (OptLevel == CodeGenOpt::None)
1695     return SDValue();
1696
1697   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1698   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1699   SmallPtrSet<SDNode*, 16> SeenOps;
1700   bool Changed = false;             // If we should replace this token factor.
1701
1702   // Start out with this token factor.
1703   TFs.push_back(N);
1704
1705   // Iterate through token factors.  The TFs grows when new token factors are
1706   // encountered.
1707   for (unsigned i = 0; i < TFs.size(); ++i) {
1708     SDNode *TF = TFs[i];
1709
1710     // Check each of the operands.
1711     for (const SDValue &Op : TF->op_values()) {
1712       switch (Op.getOpcode()) {
1713       case ISD::EntryToken:
1714         // Entry tokens don't need to be added to the list. They are
1715         // redundant.
1716         Changed = true;
1717         break;
1718
1719       case ISD::TokenFactor:
1720         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1721           // Queue up for processing.
1722           TFs.push_back(Op.getNode());
1723           // Clean up in case the token factor is removed.
1724           AddToWorklist(Op.getNode());
1725           Changed = true;
1726           break;
1727         }
1728         LLVM_FALLTHROUGH;
1729
1730       default:
1731         // Only add if it isn't already in the list.
1732         if (SeenOps.insert(Op.getNode()).second)
1733           Ops.push_back(Op);
1734         else
1735           Changed = true;
1736         break;
1737       }
1738     }
1739   }
1740
1741   // Remove Nodes that are chained to another node in the list. Do so
1742   // by walking up chains breath-first stopping when we've seen
1743   // another operand. In general we must climb to the EntryNode, but we can exit
1744   // early if we find all remaining work is associated with just one operand as
1745   // no further pruning is possible.
1746
1747   // List of nodes to search through and original Ops from which they originate.
1748   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1749   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1750   SmallPtrSet<SDNode *, 16> SeenChains;
1751   bool DidPruneOps = false;
1752
1753   unsigned NumLeftToConsider = 0;
1754   for (const SDValue &Op : Ops) {
1755     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1756     OpWorkCount.push_back(1);
1757   }
1758
1759   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1760     // If this is an Op, we can remove the op from the list. Remark any
1761     // search associated with it as from the current OpNumber.
1762     if (SeenOps.count(Op) != 0) {
1763       Changed = true;
1764       DidPruneOps = true;
1765       unsigned OrigOpNumber = 0;
1766       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1767         OrigOpNumber++;
1768       assert((OrigOpNumber != Ops.size()) &&
1769              "expected to find TokenFactor Operand");
1770       // Re-mark worklist from OrigOpNumber to OpNumber
1771       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1772         if (Worklist[i].second == OrigOpNumber) {
1773           Worklist[i].second = OpNumber;
1774         }
1775       }
1776       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1777       OpWorkCount[OrigOpNumber] = 0;
1778       NumLeftToConsider--;
1779     }
1780     // Add if it's a new chain
1781     if (SeenChains.insert(Op).second) {
1782       OpWorkCount[OpNumber]++;
1783       Worklist.push_back(std::make_pair(Op, OpNumber));
1784     }
1785   };
1786
1787   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1788     // We need at least be consider at least 2 Ops to prune.
1789     if (NumLeftToConsider <= 1)
1790       break;
1791     auto CurNode = Worklist[i].first;
1792     auto CurOpNumber = Worklist[i].second;
1793     assert((OpWorkCount[CurOpNumber] > 0) &&
1794            "Node should not appear in worklist");
1795     switch (CurNode->getOpcode()) {
1796     case ISD::EntryToken:
1797       // Hitting EntryToken is the only way for the search to terminate without
1798       // hitting
1799       // another operand's search. Prevent us from marking this operand
1800       // considered.
1801       NumLeftToConsider++;
1802       break;
1803     case ISD::TokenFactor:
1804       for (const SDValue &Op : CurNode->op_values())
1805         AddToWorklist(i, Op.getNode(), CurOpNumber);
1806       break;
1807     case ISD::CopyFromReg:
1808     case ISD::CopyToReg:
1809       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1810       break;
1811     default:
1812       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1813         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1814       break;
1815     }
1816     OpWorkCount[CurOpNumber]--;
1817     if (OpWorkCount[CurOpNumber] == 0)
1818       NumLeftToConsider--;
1819   }
1820
1821   // If we've changed things around then replace token factor.
1822   if (Changed) {
1823     SDValue Result;
1824     if (Ops.empty()) {
1825       // The entry token is the only possible outcome.
1826       Result = DAG.getEntryNode();
1827     } else {
1828       if (DidPruneOps) {
1829         SmallVector<SDValue, 8> PrunedOps;
1830         //
1831         for (const SDValue &Op : Ops) {
1832           if (SeenChains.count(Op.getNode()) == 0)
1833             PrunedOps.push_back(Op);
1834         }
1835         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1836       } else {
1837         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1838       }
1839     }
1840     return Result;
1841   }
1842   return SDValue();
1843 }
1844
1845 /// MERGE_VALUES can always be eliminated.
1846 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1847   WorklistRemover DeadNodes(*this);
1848   // Replacing results may cause a different MERGE_VALUES to suddenly
1849   // be CSE'd with N, and carry its uses with it. Iterate until no
1850   // uses remain, to ensure that the node can be safely deleted.
1851   // First add the users of this node to the work list so that they
1852   // can be tried again once they have new operands.
1853   AddUsersToWorklist(N);
1854   do {
1855     // Do as a single replacement to avoid rewalking use lists.
1856     SmallVector<SDValue, 8> Ops;
1857     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1858       Ops.push_back(N->getOperand(i));
1859     DAG.ReplaceAllUsesWith(N, Ops.data());
1860   } while (!N->use_empty());
1861   deleteAndRecombine(N);
1862   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1863 }
1864
1865 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1866 /// ConstantSDNode pointer else nullptr.
1867 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1868   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1869   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1870 }
1871
1872 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1873   assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1874
1875   // Don't do this unless the old select is going away. We want to eliminate the
1876   // binary operator, not replace a binop with a select.
1877   // TODO: Handle ISD::SELECT_CC.
1878   unsigned SelOpNo = 0;
1879   SDValue Sel = BO->getOperand(0);
1880   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1881     SelOpNo = 1;
1882     Sel = BO->getOperand(1);
1883   }
1884
1885   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1886     return SDValue();
1887
1888   SDValue CT = Sel.getOperand(1);
1889   if (!isConstantOrConstantVector(CT, true) &&
1890       !isConstantFPBuildVectorOrConstantFP(CT))
1891     return SDValue();
1892
1893   SDValue CF = Sel.getOperand(2);
1894   if (!isConstantOrConstantVector(CF, true) &&
1895       !isConstantFPBuildVectorOrConstantFP(CF))
1896     return SDValue();
1897
1898   // Bail out if any constants are opaque because we can't constant fold those.
1899   // The exception is "and" and "or" with either 0 or -1 in which case we can
1900   // propagate non constant operands into select. I.e.:
1901   // and (select Cond, 0, -1), X --> select Cond, 0, X
1902   // or X, (select Cond, -1, 0) --> select Cond, -1, X
1903   auto BinOpcode = BO->getOpcode();
1904   bool CanFoldNonConst =
1905       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1906       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
1907       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
1908
1909   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1910   if (!CanFoldNonConst &&
1911       !isConstantOrConstantVector(CBO, true) &&
1912       !isConstantFPBuildVectorOrConstantFP(CBO))
1913     return SDValue();
1914
1915   EVT VT = Sel.getValueType();
1916
1917   // In case of shift value and shift amount may have different VT. For instance
1918   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1919   // swapped operands and value types do not match. NB: x86 is fine if operands
1920   // are not swapped with shift amount VT being not bigger than shifted value.
1921   // TODO: that is possible to check for a shift operation, correct VTs and
1922   // still perform optimization on x86 if needed.
1923   if (SelOpNo && VT != CBO.getValueType())
1924     return SDValue();
1925
1926   // We have a select-of-constants followed by a binary operator with a
1927   // constant. Eliminate the binop by pulling the constant math into the select.
1928   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1929   SDLoc DL(Sel);
1930   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1931                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1932   if (!CanFoldNonConst && !NewCT.isUndef() &&
1933       !isConstantOrConstantVector(NewCT, true) &&
1934       !isConstantFPBuildVectorOrConstantFP(NewCT))
1935     return SDValue();
1936
1937   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1938                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1939   if (!CanFoldNonConst && !NewCF.isUndef() &&
1940       !isConstantOrConstantVector(NewCF, true) &&
1941       !isConstantFPBuildVectorOrConstantFP(NewCF))
1942     return SDValue();
1943
1944   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1945 }
1946
1947 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
1948   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1949          "Expecting add or sub");
1950
1951   // Match a constant operand and a zext operand for the math instruction:
1952   // add Z, C
1953   // sub C, Z
1954   bool IsAdd = N->getOpcode() == ISD::ADD;
1955   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1956   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1957   auto *CN = dyn_cast<ConstantSDNode>(C);
1958   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1959     return SDValue();
1960
1961   // Match the zext operand as a setcc of a boolean.
1962   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1963       Z.getOperand(0).getValueType() != MVT::i1)
1964     return SDValue();
1965
1966   // Match the compare as: setcc (X & 1), 0, eq.
1967   SDValue SetCC = Z.getOperand(0);
1968   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1969   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1970       SetCC.getOperand(0).getOpcode() != ISD::AND ||
1971       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1972     return SDValue();
1973
1974   // We are adding/subtracting a constant and an inverted low bit. Turn that
1975   // into a subtract/add of the low bit with incremented/decremented constant:
1976   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1977   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1978   EVT VT = C.getValueType();
1979   SDLoc DL(N);
1980   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1981   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1982                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1983   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1984 }
1985
1986 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
1987 /// a shift and add with a different constant.
1988 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
1989   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1990          "Expecting add or sub");
1991
1992   // We need a constant operand for the add/sub, and the other operand is a
1993   // logical shift right: add (srl), C or sub C, (srl).
1994   bool IsAdd = N->getOpcode() == ISD::ADD;
1995   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
1996   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
1997   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
1998   if (!C || ShiftOp.getOpcode() != ISD::SRL)
1999     return SDValue();
2000
2001   // The shift must be of a 'not' value.
2002   SDValue Not = ShiftOp.getOperand(0);
2003   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2004     return SDValue();
2005
2006   // The shift must be moving the sign bit to the least-significant-bit.
2007   EVT VT = ShiftOp.getValueType();
2008   SDValue ShAmt = ShiftOp.getOperand(1);
2009   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2010   if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2011     return SDValue();
2012
2013   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2014   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2015   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2016   SDLoc DL(N);
2017   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2018   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2019   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2020   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2021 }
2022
2023 SDValue DAGCombiner::visitADD(SDNode *N) {
2024   SDValue N0 = N->getOperand(0);
2025   SDValue N1 = N->getOperand(1);
2026   EVT VT = N0.getValueType();
2027   SDLoc DL(N);
2028
2029   // fold vector ops
2030   if (VT.isVector()) {
2031     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2032       return FoldedVOp;
2033
2034     // fold (add x, 0) -> x, vector edition
2035     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2036       return N0;
2037     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2038       return N1;
2039   }
2040
2041   // fold (add x, undef) -> undef
2042   if (N0.isUndef())
2043     return N0;
2044
2045   if (N1.isUndef())
2046     return N1;
2047
2048   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2049     // canonicalize constant to RHS
2050     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2051       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2052     // fold (add c1, c2) -> c1+c2
2053     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2054                                       N1.getNode());
2055   }
2056
2057   // fold (add x, 0) -> x
2058   if (isNullConstant(N1))
2059     return N0;
2060
2061   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2062     // fold ((c1-A)+c2) -> (c1+c2)-A
2063     if (N0.getOpcode() == ISD::SUB &&
2064         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2065       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2066       return DAG.getNode(ISD::SUB, DL, VT,
2067                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2068                          N0.getOperand(1));
2069     }
2070
2071     // add (sext i1 X), 1 -> zext (not i1 X)
2072     // We don't transform this pattern:
2073     //   add (zext i1 X), -1 -> sext (not i1 X)
2074     // because most (?) targets generate better code for the zext form.
2075     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2076         isOneOrOneSplat(N1)) {
2077       SDValue X = N0.getOperand(0);
2078       if ((!LegalOperations ||
2079            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2080             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2081           X.getScalarValueSizeInBits() == 1) {
2082         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2083         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2084       }
2085     }
2086
2087     // Undo the add -> or combine to merge constant offsets from a frame index.
2088     if (N0.getOpcode() == ISD::OR &&
2089         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2090         isa<ConstantSDNode>(N0.getOperand(1)) &&
2091         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2092       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2093       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2094     }
2095   }
2096
2097   if (SDValue NewSel = foldBinOpIntoSelect(N))
2098     return NewSel;
2099
2100   // reassociate add
2101   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2102     return RADD;
2103
2104   // fold ((0-A) + B) -> B-A
2105   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2106     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2107
2108   // fold (A + (0-B)) -> A-B
2109   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2110     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2111
2112   // fold (A+(B-A)) -> B
2113   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2114     return N1.getOperand(0);
2115
2116   // fold ((B-A)+A) -> B
2117   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2118     return N0.getOperand(0);
2119
2120   // fold ((A-B)+(C-A)) -> (C-B)
2121   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2122       N0.getOperand(0) == N1.getOperand(1))
2123     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2124                        N0.getOperand(1));
2125
2126   // fold ((A-B)+(B-C)) -> (A-C)
2127   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2128       N0.getOperand(1) == N1.getOperand(0))
2129     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2130                        N1.getOperand(1));
2131
2132   // fold (A+(B-(A+C))) to (B-C)
2133   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2134       N0 == N1.getOperand(1).getOperand(0))
2135     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2136                        N1.getOperand(1).getOperand(1));
2137
2138   // fold (A+(B-(C+A))) to (B-C)
2139   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2140       N0 == N1.getOperand(1).getOperand(1))
2141     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2142                        N1.getOperand(1).getOperand(0));
2143
2144   // fold (A+((B-A)+or-C)) to (B+or-C)
2145   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2146       N1.getOperand(0).getOpcode() == ISD::SUB &&
2147       N0 == N1.getOperand(0).getOperand(1))
2148     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2149                        N1.getOperand(1));
2150
2151   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2152   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2153     SDValue N00 = N0.getOperand(0);
2154     SDValue N01 = N0.getOperand(1);
2155     SDValue N10 = N1.getOperand(0);
2156     SDValue N11 = N1.getOperand(1);
2157
2158     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2159       return DAG.getNode(ISD::SUB, DL, VT,
2160                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2161                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2162   }
2163
2164   // fold (add (umax X, C), -C) --> (usubsat X, C)
2165   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2166     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2167       return (!Max && !Op) ||
2168              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2169     };
2170     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2171                                   /*AllowUndefs*/ true))
2172       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2173                          N0.getOperand(1));
2174   }
2175
2176   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2177     return V;
2178
2179   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2180     return V;
2181
2182   if (SimplifyDemandedBits(SDValue(N, 0)))
2183     return SDValue(N, 0);
2184
2185   // fold (a+b) -> (a|b) iff a and b share no bits.
2186   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2187       DAG.haveNoCommonBitsSet(N0, N1))
2188     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2189
2190   // fold (add (xor a, -1), 1) -> (sub 0, a)
2191   if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
2192     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2193                        N0.getOperand(0));
2194
2195   if (SDValue Combined = visitADDLike(N0, N1, N))
2196     return Combined;
2197
2198   if (SDValue Combined = visitADDLike(N1, N0, N))
2199     return Combined;
2200
2201   return SDValue();
2202 }
2203
2204 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2205   unsigned Opcode = N->getOpcode();
2206   SDValue N0 = N->getOperand(0);
2207   SDValue N1 = N->getOperand(1);
2208   EVT VT = N0.getValueType();
2209   SDLoc DL(N);
2210
2211   // fold vector ops
2212   if (VT.isVector()) {
2213     // TODO SimplifyVBinOp
2214
2215     // fold (add_sat x, 0) -> x, vector edition
2216     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2217       return N0;
2218     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2219       return N1;
2220   }
2221
2222   // fold (add_sat x, undef) -> -1
2223   if (N0.isUndef() || N1.isUndef())
2224     return DAG.getAllOnesConstant(DL, VT);
2225
2226   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2227     // canonicalize constant to RHS
2228     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2229       return DAG.getNode(Opcode, DL, VT, N1, N0);
2230     // fold (add_sat c1, c2) -> c3
2231     return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2232                                       N1.getNode());
2233   }
2234
2235   // fold (add_sat x, 0) -> x
2236   if (isNullConstant(N1))
2237     return N0;
2238
2239   // If it cannot overflow, transform into an add.
2240   if (Opcode == ISD::UADDSAT)
2241     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2242       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2243
2244   return SDValue();
2245 }
2246
2247 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2248   bool Masked = false;
2249
2250   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2251   while (true) {
2252     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2253       V = V.getOperand(0);
2254       continue;
2255     }
2256
2257     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2258       Masked = true;
2259       V = V.getOperand(0);
2260       continue;
2261     }
2262
2263     break;
2264   }
2265
2266   // If this is not a carry, return.
2267   if (V.getResNo() != 1)
2268     return SDValue();
2269
2270   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2271       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2272     return SDValue();
2273
2274   // If the result is masked, then no matter what kind of bool it is we can
2275   // return. If it isn't, then we need to make sure the bool type is either 0 or
2276   // 1 and not other values.
2277   if (Masked ||
2278       TLI.getBooleanContents(V.getValueType()) ==
2279           TargetLoweringBase::ZeroOrOneBooleanContent)
2280     return V;
2281
2282   return SDValue();
2283 }
2284
2285 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2286 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2287 /// the opcode and bypass the mask operation.
2288 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2289                                  SelectionDAG &DAG, const SDLoc &DL) {
2290   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2291     return SDValue();
2292
2293   EVT VT = N0.getValueType();
2294   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2295     return SDValue();
2296
2297   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2298   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2299   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2300 }
2301
2302 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2303   EVT VT = N0.getValueType();
2304   SDLoc DL(LocReference);
2305
2306   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2307   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2308       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2309     return DAG.getNode(ISD::SUB, DL, VT, N0,
2310                        DAG.getNode(ISD::SHL, DL, VT,
2311                                    N1.getOperand(0).getOperand(1),
2312                                    N1.getOperand(1)));
2313
2314   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2315     return V;
2316
2317   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2318   // rather than 'add 0/-1' (the zext should get folded).
2319   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2320   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2321       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2322       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2323     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2324     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2325   }
2326
2327   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2328   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2329     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2330     if (TN->getVT() == MVT::i1) {
2331       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2332                                  DAG.getConstant(1, DL, VT));
2333       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2334     }
2335   }
2336
2337   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2338   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2339       N1.getResNo() == 0)
2340     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2341                        N0, N1.getOperand(0), N1.getOperand(2));
2342
2343   // (add X, Carry) -> (addcarry X, 0, Carry)
2344   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2345     if (SDValue Carry = getAsCarry(TLI, N1))
2346       return DAG.getNode(ISD::ADDCARRY, DL,
2347                          DAG.getVTList(VT, Carry.getValueType()), N0,
2348                          DAG.getConstant(0, DL, VT), Carry);
2349
2350   return SDValue();
2351 }
2352
2353 SDValue DAGCombiner::visitADDC(SDNode *N) {
2354   SDValue N0 = N->getOperand(0);
2355   SDValue N1 = N->getOperand(1);
2356   EVT VT = N0.getValueType();
2357   SDLoc DL(N);
2358
2359   // If the flag result is dead, turn this into an ADD.
2360   if (!N->hasAnyUseOfValue(1))
2361     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2362                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2363
2364   // canonicalize constant to RHS.
2365   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2366   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2367   if (N0C && !N1C)
2368     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2369
2370   // fold (addc x, 0) -> x + no carry out
2371   if (isNullConstant(N1))
2372     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2373                                         DL, MVT::Glue));
2374
2375   // If it cannot overflow, transform into an add.
2376   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2377     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2378                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2379
2380   return SDValue();
2381 }
2382
2383 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2384                            SelectionDAG &DAG, const TargetLowering &TLI) {
2385   SDValue Cst;
2386   switch (TLI.getBooleanContents(VT)) {
2387   case TargetLowering::ZeroOrOneBooleanContent:
2388   case TargetLowering::UndefinedBooleanContent:
2389     Cst = DAG.getConstant(1, DL, VT);
2390     break;
2391   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2392     Cst = DAG.getConstant(-1, DL, VT);
2393     break;
2394   }
2395
2396   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2397 }
2398
2399 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2400   if (V.getOpcode() != ISD::XOR) return false;
2401   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
2402   if (!Const) return false;
2403
2404   switch(TLI.getBooleanContents(VT)) {
2405     case TargetLowering::ZeroOrOneBooleanContent:
2406       return Const->isOne();
2407     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2408       return Const->isAllOnesValue();
2409     case TargetLowering::UndefinedBooleanContent:
2410       return (Const->getAPIntValue() & 0x01) == 1;
2411   }
2412   llvm_unreachable("Unsupported boolean content");
2413 }
2414
2415 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2416   SDValue N0 = N->getOperand(0);
2417   SDValue N1 = N->getOperand(1);
2418   EVT VT = N0.getValueType();
2419   if (VT.isVector())
2420     return SDValue();
2421
2422   EVT CarryVT = N->getValueType(1);
2423   SDLoc DL(N);
2424
2425   // If the flag result is dead, turn this into an ADD.
2426   if (!N->hasAnyUseOfValue(1))
2427     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2428                      DAG.getUNDEF(CarryVT));
2429
2430   // canonicalize constant to RHS.
2431   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2432   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2433   if (N0C && !N1C)
2434     return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2435
2436   // fold (uaddo x, 0) -> x + no carry out
2437   if (isNullConstant(N1))
2438     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2439
2440   // If it cannot overflow, transform into an add.
2441   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2442     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2443                      DAG.getConstant(0, DL, CarryVT));
2444
2445   // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2446   if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2447     SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2448                               DAG.getConstant(0, DL, VT),
2449                               N0.getOperand(0));
2450     return CombineTo(N, Sub,
2451                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2452   }
2453
2454   if (SDValue Combined = visitUADDOLike(N0, N1, N))
2455     return Combined;
2456
2457   if (SDValue Combined = visitUADDOLike(N1, N0, N))
2458     return Combined;
2459
2460   return SDValue();
2461 }
2462
2463 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2464   auto VT = N0.getValueType();
2465
2466   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2467   // If Y + 1 cannot overflow.
2468   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2469     SDValue Y = N1.getOperand(0);
2470     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2471     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2472       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2473                          N1.getOperand(2));
2474   }
2475
2476   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2477   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2478     if (SDValue Carry = getAsCarry(TLI, N1))
2479       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2480                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2481
2482   return SDValue();
2483 }
2484
2485 SDValue DAGCombiner::visitADDE(SDNode *N) {
2486   SDValue N0 = N->getOperand(0);
2487   SDValue N1 = N->getOperand(1);
2488   SDValue CarryIn = N->getOperand(2);
2489
2490   // canonicalize constant to RHS
2491   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2492   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2493   if (N0C && !N1C)
2494     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2495                        N1, N0, CarryIn);
2496
2497   // fold (adde x, y, false) -> (addc x, y)
2498   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2499     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2500
2501   return SDValue();
2502 }
2503
2504 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2505   SDValue N0 = N->getOperand(0);
2506   SDValue N1 = N->getOperand(1);
2507   SDValue CarryIn = N->getOperand(2);
2508   SDLoc DL(N);
2509
2510   // canonicalize constant to RHS
2511   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2512   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2513   if (N0C && !N1C)
2514     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2515
2516   // fold (addcarry x, y, false) -> (uaddo x, y)
2517   if (isNullConstant(CarryIn)) {
2518     if (!LegalOperations ||
2519         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2520       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2521   }
2522
2523   EVT CarryVT = CarryIn.getValueType();
2524
2525   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2526   if (isNullConstant(N0) && isNullConstant(N1)) {
2527     EVT VT = N0.getValueType();
2528     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2529     AddToWorklist(CarryExt.getNode());
2530     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2531                                     DAG.getConstant(1, DL, VT)),
2532                      DAG.getConstant(0, DL, CarryVT));
2533   }
2534
2535   // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2536   if (isBitwiseNot(N0) && isNullConstant(N1) &&
2537       isBooleanFlip(CarryIn, CarryVT, TLI)) {
2538     SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2539                               DAG.getConstant(0, DL, N0.getValueType()),
2540                               N0.getOperand(0), CarryIn.getOperand(0));
2541     return CombineTo(N, Sub,
2542                      flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2543   }
2544
2545   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2546     return Combined;
2547
2548   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2549     return Combined;
2550
2551   return SDValue();
2552 }
2553
2554 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2555                                        SDNode *N) {
2556   // Iff the flag result is dead:
2557   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2558   if ((N0.getOpcode() == ISD::ADD ||
2559        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2560       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2561     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2562                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2563
2564   /**
2565    * When one of the addcarry argument is itself a carry, we may be facing
2566    * a diamond carry propagation. In which case we try to transform the DAG
2567    * to ensure linear carry propagation if that is possible.
2568    *
2569    * We are trying to get:
2570    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2571    */
2572   if (auto Y = getAsCarry(TLI, N1)) {
2573     /**
2574      *            (uaddo A, B)
2575      *             /       \
2576      *          Carry      Sum
2577      *            |          \
2578      *            | (addcarry *, 0, Z)
2579      *            |       /
2580      *             \   Carry
2581      *              |   /
2582      * (addcarry X, *, *)
2583      */
2584     if (Y.getOpcode() == ISD::UADDO &&
2585         CarryIn.getResNo() == 1 &&
2586         CarryIn.getOpcode() == ISD::ADDCARRY &&
2587         isNullConstant(CarryIn.getOperand(1)) &&
2588         CarryIn.getOperand(0) == Y.getValue(0)) {
2589       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2590                               Y.getOperand(0), Y.getOperand(1),
2591                               CarryIn.getOperand(2));
2592       AddToWorklist(NewY.getNode());
2593       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2594                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2595                          NewY.getValue(1));
2596     }
2597   }
2598
2599   return SDValue();
2600 }
2601
2602 // Since it may not be valid to emit a fold to zero for vector initializers
2603 // check if we can before folding.
2604 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2605                              SelectionDAG &DAG, bool LegalOperations) {
2606   if (!VT.isVector())
2607     return DAG.getConstant(0, DL, VT);
2608   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2609     return DAG.getConstant(0, DL, VT);
2610   return SDValue();
2611 }
2612
2613 SDValue DAGCombiner::visitSUB(SDNode *N) {
2614   SDValue N0 = N->getOperand(0);
2615   SDValue N1 = N->getOperand(1);
2616   EVT VT = N0.getValueType();
2617   SDLoc DL(N);
2618
2619   // fold vector ops
2620   if (VT.isVector()) {
2621     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2622       return FoldedVOp;
2623
2624     // fold (sub x, 0) -> x, vector edition
2625     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2626       return N0;
2627   }
2628
2629   // fold (sub x, x) -> 0
2630   // FIXME: Refactor this and xor and other similar operations together.
2631   if (N0 == N1)
2632     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2633   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2634       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2635     // fold (sub c1, c2) -> c1-c2
2636     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2637                                       N1.getNode());
2638   }
2639
2640   if (SDValue NewSel = foldBinOpIntoSelect(N))
2641     return NewSel;
2642
2643   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2644
2645   // fold (sub x, c) -> (add x, -c)
2646   if (N1C) {
2647     return DAG.getNode(ISD::ADD, DL, VT, N0,
2648                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2649   }
2650
2651   if (isNullOrNullSplat(N0)) {
2652     unsigned BitWidth = VT.getScalarSizeInBits();
2653     // Right-shifting everything out but the sign bit followed by negation is
2654     // the same as flipping arithmetic/logical shift type without the negation:
2655     // -(X >>u 31) -> (X >>s 31)
2656     // -(X >>s 31) -> (X >>u 31)
2657     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2658       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2659       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2660         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2661         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2662           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2663       }
2664     }
2665
2666     // 0 - X --> 0 if the sub is NUW.
2667     if (N->getFlags().hasNoUnsignedWrap())
2668       return N0;
2669
2670     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2671       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2672       // N1 must be 0 because negating the minimum signed value is undefined.
2673       if (N->getFlags().hasNoSignedWrap())
2674         return N0;
2675
2676       // 0 - X --> X if X is 0 or the minimum signed value.
2677       return N1;
2678     }
2679   }
2680
2681   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2682   if (isAllOnesOrAllOnesSplat(N0))
2683     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2684
2685   // fold (A - (0-B)) -> A+B
2686   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2687     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2688
2689   // fold A-(A-B) -> B
2690   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2691     return N1.getOperand(1);
2692
2693   // fold (A+B)-A -> B
2694   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2695     return N0.getOperand(1);
2696
2697   // fold (A+B)-B -> A
2698   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2699     return N0.getOperand(0);
2700
2701   // fold C2-(A+C1) -> (C2-C1)-A
2702   if (N1.getOpcode() == ISD::ADD) {
2703     SDValue N11 = N1.getOperand(1);
2704     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2705         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2706       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2707       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2708     }
2709   }
2710
2711   // fold ((A+(B+or-C))-B) -> A+or-C
2712   if (N0.getOpcode() == ISD::ADD &&
2713       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2714        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2715       N0.getOperand(1).getOperand(0) == N1)
2716     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2717                        N0.getOperand(1).getOperand(1));
2718
2719   // fold ((A+(C+B))-B) -> A+C
2720   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2721       N0.getOperand(1).getOperand(1) == N1)
2722     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2723                        N0.getOperand(1).getOperand(0));
2724
2725   // fold ((A-(B-C))-C) -> A-B
2726   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2727       N0.getOperand(1).getOperand(1) == N1)
2728     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2729                        N0.getOperand(1).getOperand(0));
2730
2731   // fold (A-(B-C)) -> A+(C-B)
2732   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2733     return DAG.getNode(ISD::ADD, DL, VT, N0,
2734                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2735                                    N1.getOperand(0)));
2736
2737   // fold (X - (-Y * Z)) -> (X + (Y * Z))
2738   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2739     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2740         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
2741       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2742                                 N1.getOperand(0).getOperand(1),
2743                                 N1.getOperand(1));
2744       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2745     }
2746     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2747         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
2748       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2749                                 N1.getOperand(0),
2750                                 N1.getOperand(1).getOperand(1));
2751       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2752     }
2753   }
2754
2755   // If either operand of a sub is undef, the result is undef
2756   if (N0.isUndef())
2757     return N0;
2758   if (N1.isUndef())
2759     return N1;
2760
2761   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2762     return V;
2763
2764   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2765     return V;
2766
2767   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
2768     return V;
2769
2770   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
2771   // rather than 'sub 0/1' (the sext should get folded).
2772   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
2773   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
2774       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
2775       TLI.getBooleanContents(VT) ==
2776           TargetLowering::ZeroOrNegativeOneBooleanContent) {
2777     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
2778     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
2779   }
2780
2781   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2782   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2783     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2784       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2785       SDValue S0 = N1.getOperand(0);
2786       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2787         unsigned OpSizeInBits = VT.getScalarSizeInBits();
2788         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
2789           if (C->getAPIntValue() == (OpSizeInBits - 1))
2790             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2791       }
2792     }
2793   }
2794
2795   // If the relocation model supports it, consider symbol offsets.
2796   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2797     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2798       // fold (sub Sym, c) -> Sym-c
2799       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2800         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2801                                     GA->getOffset() -
2802                                         (uint64_t)N1C->getSExtValue());
2803       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2804       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2805         if (GA->getGlobal() == GB->getGlobal())
2806           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2807                                  DL, VT);
2808     }
2809
2810   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2811   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2812     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2813     if (TN->getVT() == MVT::i1) {
2814       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2815                                  DAG.getConstant(1, DL, VT));
2816       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2817     }
2818   }
2819
2820   // Prefer an add for more folding potential and possibly better codegen:
2821   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2822   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2823     SDValue ShAmt = N1.getOperand(1);
2824     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2825     if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2826       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2827       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2828     }
2829   }
2830
2831   return SDValue();
2832 }
2833
2834 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
2835   SDValue N0 = N->getOperand(0);
2836   SDValue N1 = N->getOperand(1);
2837   EVT VT = N0.getValueType();
2838   SDLoc DL(N);
2839
2840   // fold vector ops
2841   if (VT.isVector()) {
2842     // TODO SimplifyVBinOp
2843
2844     // fold (sub_sat x, 0) -> x, vector edition
2845     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2846       return N0;
2847   }
2848
2849   // fold (sub_sat x, undef) -> 0
2850   if (N0.isUndef() || N1.isUndef())
2851     return DAG.getConstant(0, DL, VT);
2852
2853   // fold (sub_sat x, x) -> 0
2854   if (N0 == N1)
2855     return DAG.getConstant(0, DL, VT);
2856
2857   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2858       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2859     // fold (sub_sat c1, c2) -> c3
2860     return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
2861                                       N1.getNode());
2862   }
2863
2864   // fold (sub_sat x, 0) -> x
2865   if (isNullConstant(N1))
2866     return N0;
2867
2868   return SDValue();
2869 }
2870
2871 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2872   SDValue N0 = N->getOperand(0);
2873   SDValue N1 = N->getOperand(1);
2874   EVT VT = N0.getValueType();
2875   SDLoc DL(N);
2876
2877   // If the flag result is dead, turn this into an SUB.
2878   if (!N->hasAnyUseOfValue(1))
2879     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2880                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2881
2882   // fold (subc x, x) -> 0 + no borrow
2883   if (N0 == N1)
2884     return CombineTo(N, DAG.getConstant(0, DL, VT),
2885                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2886
2887   // fold (subc x, 0) -> x + no borrow
2888   if (isNullConstant(N1))
2889     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2890
2891   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2892   if (isAllOnesConstant(N0))
2893     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2894                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2895
2896   return SDValue();
2897 }
2898
2899 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2900   SDValue N0 = N->getOperand(0);
2901   SDValue N1 = N->getOperand(1);
2902   EVT VT = N0.getValueType();
2903   if (VT.isVector())
2904     return SDValue();
2905
2906   EVT CarryVT = N->getValueType(1);
2907   SDLoc DL(N);
2908
2909   // If the flag result is dead, turn this into an SUB.
2910   if (!N->hasAnyUseOfValue(1))
2911     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2912                      DAG.getUNDEF(CarryVT));
2913
2914   // fold (usubo x, x) -> 0 + no borrow
2915   if (N0 == N1)
2916     return CombineTo(N, DAG.getConstant(0, DL, VT),
2917                      DAG.getConstant(0, DL, CarryVT));
2918
2919   // fold (usubo x, 0) -> x + no borrow
2920   if (isNullConstant(N1))
2921     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2922
2923   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2924   if (isAllOnesConstant(N0))
2925     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2926                      DAG.getConstant(0, DL, CarryVT));
2927
2928   return SDValue();
2929 }
2930
2931 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2932   SDValue N0 = N->getOperand(0);
2933   SDValue N1 = N->getOperand(1);
2934   SDValue CarryIn = N->getOperand(2);
2935
2936   // fold (sube x, y, false) -> (subc x, y)
2937   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2938     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2939
2940   return SDValue();
2941 }
2942
2943 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2944   SDValue N0 = N->getOperand(0);
2945   SDValue N1 = N->getOperand(1);
2946   SDValue CarryIn = N->getOperand(2);
2947
2948   // fold (subcarry x, y, false) -> (usubo x, y)
2949   if (isNullConstant(CarryIn)) {
2950     if (!LegalOperations ||
2951         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
2952       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2953   }
2954
2955   return SDValue();
2956 }
2957
2958 SDValue DAGCombiner::visitMUL(SDNode *N) {
2959   SDValue N0 = N->getOperand(0);
2960   SDValue N1 = N->getOperand(1);
2961   EVT VT = N0.getValueType();
2962
2963   // fold (mul x, undef) -> 0
2964   if (N0.isUndef() || N1.isUndef())
2965     return DAG.getConstant(0, SDLoc(N), VT);
2966
2967   bool N0IsConst = false;
2968   bool N1IsConst = false;
2969   bool N1IsOpaqueConst = false;
2970   bool N0IsOpaqueConst = false;
2971   APInt ConstValue0, ConstValue1;
2972   // fold vector ops
2973   if (VT.isVector()) {
2974     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2975       return FoldedVOp;
2976
2977     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2978     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2979     assert((!N0IsConst ||
2980             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2981            "Splat APInt should be element width");
2982     assert((!N1IsConst ||
2983             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2984            "Splat APInt should be element width");
2985   } else {
2986     N0IsConst = isa<ConstantSDNode>(N0);
2987     if (N0IsConst) {
2988       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2989       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2990     }
2991     N1IsConst = isa<ConstantSDNode>(N1);
2992     if (N1IsConst) {
2993       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2994       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2995     }
2996   }
2997
2998   // fold (mul c1, c2) -> c1*c2
2999   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3000     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3001                                       N0.getNode(), N1.getNode());
3002
3003   // canonicalize constant to RHS (vector doesn't have to splat)
3004   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3005      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3006     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3007   // fold (mul x, 0) -> 0
3008   if (N1IsConst && ConstValue1.isNullValue())
3009     return N1;
3010   // fold (mul x, 1) -> x
3011   if (N1IsConst && ConstValue1.isOneValue())
3012     return N0;
3013
3014   if (SDValue NewSel = foldBinOpIntoSelect(N))
3015     return NewSel;
3016
3017   // fold (mul x, -1) -> 0-x
3018   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3019     SDLoc DL(N);
3020     return DAG.getNode(ISD::SUB, DL, VT,
3021                        DAG.getConstant(0, DL, VT), N0);
3022   }
3023   // fold (mul x, (1 << c)) -> x << c
3024   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3025       DAG.isKnownToBeAPowerOfTwo(N1) &&
3026       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3027     SDLoc DL(N);
3028     SDValue LogBase2 = BuildLogBase2(N1, DL);
3029     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3030     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3031     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3032   }
3033   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3034   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3035     unsigned Log2Val = (-ConstValue1).logBase2();
3036     SDLoc DL(N);
3037     // FIXME: If the input is something that is easily negated (e.g. a
3038     // single-use add), we should put the negate there.
3039     return DAG.getNode(ISD::SUB, DL, VT,
3040                        DAG.getConstant(0, DL, VT),
3041                        DAG.getNode(ISD::SHL, DL, VT, N0,
3042                             DAG.getConstant(Log2Val, DL,
3043                                       getShiftAmountTy(N0.getValueType()))));
3044   }
3045
3046   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3047   // mul x, (2^N + 1) --> add (shl x, N), x
3048   // mul x, (2^N - 1) --> sub (shl x, N), x
3049   // Examples: x * 33 --> (x << 5) + x
3050   //           x * 15 --> (x << 4) - x
3051   //           x * -33 --> -((x << 5) + x)
3052   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3053   if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3054     // TODO: We could handle more general decomposition of any constant by
3055     //       having the target set a limit on number of ops and making a
3056     //       callback to determine that sequence (similar to sqrt expansion).
3057     unsigned MathOp = ISD::DELETED_NODE;
3058     APInt MulC = ConstValue1.abs();
3059     if ((MulC - 1).isPowerOf2())
3060       MathOp = ISD::ADD;
3061     else if ((MulC + 1).isPowerOf2())
3062       MathOp = ISD::SUB;
3063
3064     if (MathOp != ISD::DELETED_NODE) {
3065       unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
3066                                           : (MulC + 1).logBase2();
3067       assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
3068              "Not expecting multiply-by-constant that could have simplified");
3069       SDLoc DL(N);
3070       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
3071                                 DAG.getConstant(ShAmt, DL, VT));
3072       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3073       if (ConstValue1.isNegative())
3074         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3075       return R;
3076     }
3077   }
3078
3079   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3080   if (N0.getOpcode() == ISD::SHL &&
3081       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3082       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3083     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3084     if (isConstantOrConstantVector(C3))
3085       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3086   }
3087
3088   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3089   // use.
3090   {
3091     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3092
3093     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3094     if (N0.getOpcode() == ISD::SHL &&
3095         isConstantOrConstantVector(N0.getOperand(1)) &&
3096         N0.getNode()->hasOneUse()) {
3097       Sh = N0; Y = N1;
3098     } else if (N1.getOpcode() == ISD::SHL &&
3099                isConstantOrConstantVector(N1.getOperand(1)) &&
3100                N1.getNode()->hasOneUse()) {
3101       Sh = N1; Y = N0;
3102     }
3103
3104     if (Sh.getNode()) {
3105       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3106       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3107     }
3108   }
3109
3110   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3111   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3112       N0.getOpcode() == ISD::ADD &&
3113       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3114       isMulAddWithConstProfitable(N, N0, N1))
3115       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3116                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3117                                      N0.getOperand(0), N1),
3118                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3119                                      N0.getOperand(1), N1));
3120
3121   // reassociate mul
3122   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3123     return RMUL;
3124
3125   return SDValue();
3126 }
3127
3128 /// Return true if divmod libcall is available.
3129 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3130                                      const TargetLowering &TLI) {
3131   RTLIB::Libcall LC;
3132   EVT NodeType = Node->getValueType(0);
3133   if (!NodeType.isSimple())
3134     return false;
3135   switch (NodeType.getSimpleVT().SimpleTy) {
3136   default: return false; // No libcall for vector types.
3137   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3138   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3139   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3140   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3141   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3142   }
3143
3144   return TLI.getLibcallName(LC) != nullptr;
3145 }
3146
3147 /// Issue divrem if both quotient and remainder are needed.
3148 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3149   if (Node->use_empty())
3150     return SDValue(); // This is a dead node, leave it alone.
3151
3152   unsigned Opcode = Node->getOpcode();
3153   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3154   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3155
3156   // DivMod lib calls can still work on non-legal types if using lib-calls.
3157   EVT VT = Node->getValueType(0);
3158   if (VT.isVector() || !VT.isInteger())
3159     return SDValue();
3160
3161   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3162     return SDValue();
3163
3164   // If DIVREM is going to get expanded into a libcall,
3165   // but there is no libcall available, then don't combine.
3166   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3167       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3168     return SDValue();
3169
3170   // If div is legal, it's better to do the normal expansion
3171   unsigned OtherOpcode = 0;
3172   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3173     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3174     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3175       return SDValue();
3176   } else {
3177     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3178     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3179       return SDValue();
3180   }
3181
3182   SDValue Op0 = Node->getOperand(0);
3183   SDValue Op1 = Node->getOperand(1);
3184   SDValue combined;
3185   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3186          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3187     SDNode *User = *UI;
3188     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3189         User->use_empty())
3190       continue;
3191     // Convert the other matching node(s), too;
3192     // otherwise, the DIVREM may get target-legalized into something
3193     // target-specific that we won't be able to recognize.
3194     unsigned UserOpc = User->getOpcode();
3195     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3196         User->getOperand(0) == Op0 &&
3197         User->getOperand(1) == Op1) {
3198       if (!combined) {
3199         if (UserOpc == OtherOpcode) {
3200           SDVTList VTs = DAG.getVTList(VT, VT);
3201           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3202         } else if (UserOpc == DivRemOpc) {
3203           combined = SDValue(User, 0);
3204         } else {
3205           assert(UserOpc == Opcode);
3206           continue;
3207         }
3208       }
3209       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3210         CombineTo(User, combined);
3211       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3212         CombineTo(User, combined.getValue(1));
3213     }
3214   }
3215   return combined;
3216 }
3217
3218 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3219   SDValue N0 = N->getOperand(0);
3220   SDValue N1 = N->getOperand(1);
3221   EVT VT = N->getValueType(0);
3222   SDLoc DL(N);
3223
3224   unsigned Opc = N->getOpcode();
3225   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3226   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3227
3228   // X / undef -> undef
3229   // X % undef -> undef
3230   // X / 0 -> undef
3231   // X % 0 -> undef
3232   // NOTE: This includes vectors where any divisor element is zero/undef.
3233   if (DAG.isUndef(Opc, {N0, N1}))
3234     return DAG.getUNDEF(VT);
3235
3236   // undef / X -> 0
3237   // undef % X -> 0
3238   if (N0.isUndef())
3239     return DAG.getConstant(0, DL, VT);
3240
3241   // 0 / X -> 0
3242   // 0 % X -> 0
3243   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3244   if (N0C && N0C->isNullValue())
3245     return N0;
3246
3247   // X / X -> 1
3248   // X % X -> 0
3249   if (N0 == N1)
3250     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3251
3252   // X / 1 -> X
3253   // X % 1 -> 0
3254   // If this is a boolean op (single-bit element type), we can't have
3255   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3256   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3257   // it's a 1.
3258   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3259     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3260
3261   return SDValue();
3262 }
3263
3264 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3265   SDValue N0 = N->getOperand(0);
3266   SDValue N1 = N->getOperand(1);
3267   EVT VT = N->getValueType(0);
3268   EVT CCVT = getSetCCResultType(VT);
3269
3270   // fold vector ops
3271   if (VT.isVector())
3272     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3273       return FoldedVOp;
3274
3275   SDLoc DL(N);
3276
3277   // fold (sdiv c1, c2) -> c1/c2
3278   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3279   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3280   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3281     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3282   // fold (sdiv X, -1) -> 0-X
3283   if (N1C && N1C->isAllOnesValue())
3284     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3285   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3286   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3287     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3288                          DAG.getConstant(1, DL, VT),
3289                          DAG.getConstant(0, DL, VT));
3290
3291   if (SDValue V = simplifyDivRem(N, DAG))
3292     return V;
3293
3294   if (SDValue NewSel = foldBinOpIntoSelect(N))
3295     return NewSel;
3296
3297   // If we know the sign bits of both operands are zero, strength reduce to a
3298   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3299   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3300     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3301
3302   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3303     // If the corresponding remainder node exists, update its users with
3304     // (Dividend - (Quotient * Divisor).
3305     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3306                                               { N0, N1 })) {
3307       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3308       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3309       AddToWorklist(Mul.getNode());
3310       AddToWorklist(Sub.getNode());
3311       CombineTo(RemNode, Sub);
3312     }
3313     return V;
3314   }
3315
3316   // sdiv, srem -> sdivrem
3317   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3318   // true.  Otherwise, we break the simplification logic in visitREM().
3319   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3320   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3321     if (SDValue DivRem = useDivRem(N))
3322         return DivRem;
3323
3324   return SDValue();
3325 }
3326
3327 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3328   SDLoc DL(N);
3329   EVT VT = N->getValueType(0);
3330   EVT CCVT = getSetCCResultType(VT);
3331   unsigned BitWidth = VT.getScalarSizeInBits();
3332
3333   // Helper for determining whether a value is a power-2 constant scalar or a
3334   // vector of such elements.
3335   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3336     if (C->isNullValue() || C->isOpaque())
3337       return false;
3338     if (C->getAPIntValue().isPowerOf2())
3339       return true;
3340     if ((-C->getAPIntValue()).isPowerOf2())
3341       return true;
3342     return false;
3343   };
3344
3345   // fold (sdiv X, pow2) -> simple ops after legalize
3346   // FIXME: We check for the exact bit here because the generic lowering gives
3347   // better results in that case. The target-specific lowering should learn how
3348   // to handle exact sdivs efficiently.
3349   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3350     // Target-specific implementation of sdiv x, pow2.
3351     if (SDValue Res = BuildSDIVPow2(N))
3352       return Res;
3353
3354     // Create constants that are functions of the shift amount value.
3355     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3356     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3357     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3358     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3359     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3360     if (!isConstantOrConstantVector(Inexact))
3361       return SDValue();
3362
3363     // Splat the sign bit into the register
3364     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3365                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3366     AddToWorklist(Sign.getNode());
3367
3368     // Add (N0 < 0) ? abs2 - 1 : 0;
3369     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3370     AddToWorklist(Srl.getNode());
3371     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3372     AddToWorklist(Add.getNode());
3373     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3374     AddToWorklist(Sra.getNode());
3375
3376     // Special case: (sdiv X, 1) -> X
3377     // Special Case: (sdiv X, -1) -> 0-X
3378     SDValue One = DAG.getConstant(1, DL, VT);
3379     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3380     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3381     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3382     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3383     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3384
3385     // If dividing by a positive value, we're done. Otherwise, the result must
3386     // be negated.
3387     SDValue Zero = DAG.getConstant(0, DL, VT);
3388     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3389
3390     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3391     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3392     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3393     return Res;
3394   }
3395
3396   // If integer divide is expensive and we satisfy the requirements, emit an
3397   // alternate sequence.  Targets may check function attributes for size/speed
3398   // trade-offs.
3399   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3400   if (isConstantOrConstantVector(N1) &&
3401       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3402     if (SDValue Op = BuildSDIV(N))
3403       return Op;
3404
3405   return SDValue();
3406 }
3407
3408 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3409   SDValue N0 = N->getOperand(0);
3410   SDValue N1 = N->getOperand(1);
3411   EVT VT = N->getValueType(0);
3412   EVT CCVT = getSetCCResultType(VT);
3413
3414   // fold vector ops
3415   if (VT.isVector())
3416     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3417       return FoldedVOp;
3418
3419   SDLoc DL(N);
3420
3421   // fold (udiv c1, c2) -> c1/c2
3422   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3423   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3424   if (N0C && N1C)
3425     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3426                                                     N0C, N1C))
3427       return Folded;
3428   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3429   if (N1C && N1C->getAPIntValue().isAllOnesValue())
3430     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3431                          DAG.getConstant(1, DL, VT),
3432                          DAG.getConstant(0, DL, VT));
3433
3434   if (SDValue V = simplifyDivRem(N, DAG))
3435     return V;
3436
3437   if (SDValue NewSel = foldBinOpIntoSelect(N))
3438     return NewSel;
3439
3440   if (SDValue V = visitUDIVLike(N0, N1, N)) {
3441     // If the corresponding remainder node exists, update its users with
3442     // (Dividend - (Quotient * Divisor).
3443     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3444                                               { N0, N1 })) {
3445       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3446       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3447       AddToWorklist(Mul.getNode());
3448       AddToWorklist(Sub.getNode());
3449       CombineTo(RemNode, Sub);
3450     }
3451     return V;
3452   }
3453
3454   // sdiv, srem -> sdivrem
3455   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3456   // true.  Otherwise, we break the simplification logic in visitREM().
3457   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3458   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3459     if (SDValue DivRem = useDivRem(N))
3460         return DivRem;
3461
3462   return SDValue();
3463 }
3464
3465 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3466   SDLoc DL(N);
3467   EVT VT = N->getValueType(0);
3468
3469   // fold (udiv x, (1 << c)) -> x >>u c
3470   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3471       DAG.isKnownToBeAPowerOfTwo(N1)) {
3472     SDValue LogBase2 = BuildLogBase2(N1, DL);
3473     AddToWorklist(LogBase2.getNode());
3474
3475     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3476     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3477     AddToWorklist(Trunc.getNode());
3478     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3479   }
3480
3481   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3482   if (N1.getOpcode() == ISD::SHL) {
3483     SDValue N10 = N1.getOperand(0);
3484     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3485         DAG.isKnownToBeAPowerOfTwo(N10)) {
3486       SDValue LogBase2 = BuildLogBase2(N10, DL);
3487       AddToWorklist(LogBase2.getNode());
3488
3489       EVT ADDVT = N1.getOperand(1).getValueType();
3490       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3491       AddToWorklist(Trunc.getNode());
3492       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3493       AddToWorklist(Add.getNode());
3494       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3495     }
3496   }
3497
3498   // fold (udiv x, c) -> alternate
3499   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3500   if (isConstantOrConstantVector(N1) &&
3501       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3502     if (SDValue Op = BuildUDIV(N))
3503       return Op;
3504
3505   return SDValue();
3506 }
3507
3508 // handles ISD::SREM and ISD::UREM
3509 SDValue DAGCombiner::visitREM(SDNode *N) {
3510   unsigned Opcode = N->getOpcode();
3511   SDValue N0 = N->getOperand(0);
3512   SDValue N1 = N->getOperand(1);
3513   EVT VT = N->getValueType(0);
3514   EVT CCVT = getSetCCResultType(VT);
3515
3516   bool isSigned = (Opcode == ISD::SREM);
3517   SDLoc DL(N);
3518
3519   // fold (rem c1, c2) -> c1%c2
3520   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3521   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3522   if (N0C && N1C)
3523     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3524       return Folded;
3525   // fold (urem X, -1) -> select(X == -1, 0, x)
3526   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3527     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3528                          DAG.getConstant(0, DL, VT), N0);
3529
3530   if (SDValue V = simplifyDivRem(N, DAG))
3531     return V;
3532
3533   if (SDValue NewSel = foldBinOpIntoSelect(N))
3534     return NewSel;
3535
3536   if (isSigned) {
3537     // If we know the sign bits of both operands are zero, strength reduce to a
3538     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3539     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3540       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3541   } else {
3542     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3543     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3544       // fold (urem x, pow2) -> (and x, pow2-1)
3545       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3546       AddToWorklist(Add.getNode());
3547       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3548     }
3549     if (N1.getOpcode() == ISD::SHL &&
3550         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3551       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3552       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3553       AddToWorklist(Add.getNode());
3554       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3555     }
3556   }
3557
3558   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3559
3560   // If X/C can be simplified by the division-by-constant logic, lower
3561   // X%C to the equivalent of X-X/C*C.
3562   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3563   // speculative DIV must not cause a DIVREM conversion.  We guard against this
3564   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
3565   // combine will not return a DIVREM.  Regardless, checking cheapness here
3566   // makes sense since the simplification results in fatter code.
3567   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3568     SDValue OptimizedDiv =
3569         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3570     if (OptimizedDiv.getNode()) {
3571       // If the equivalent Div node also exists, update its users.
3572       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3573       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3574                                                 { N0, N1 }))
3575         CombineTo(DivNode, OptimizedDiv);
3576       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3577       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3578       AddToWorklist(OptimizedDiv.getNode());
3579       AddToWorklist(Mul.getNode());
3580       return Sub;
3581     }
3582   }
3583
3584   // sdiv, srem -> sdivrem
3585   if (SDValue DivRem = useDivRem(N))
3586     return DivRem.getValue(1);
3587
3588   return SDValue();
3589 }
3590
3591 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3592   SDValue N0 = N->getOperand(0);
3593   SDValue N1 = N->getOperand(1);
3594   EVT VT = N->getValueType(0);
3595   SDLoc DL(N);
3596
3597   if (VT.isVector()) {
3598     // fold (mulhs x, 0) -> 0
3599     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3600       return N1;
3601     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3602       return N0;
3603   }
3604
3605   // fold (mulhs x, 0) -> 0
3606   if (isNullConstant(N1))
3607     return N1;
3608   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3609   if (isOneConstant(N1))
3610     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3611                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3612                                        getShiftAmountTy(N0.getValueType())));
3613
3614   // fold (mulhs x, undef) -> 0
3615   if (N0.isUndef() || N1.isUndef())
3616     return DAG.getConstant(0, DL, VT);
3617
3618   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3619   // plus a shift.
3620   if (VT.isSimple() && !VT.isVector()) {
3621     MVT Simple = VT.getSimpleVT();
3622     unsigned SimpleSize = Simple.getSizeInBits();
3623     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3624     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3625       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3626       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3627       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3628       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3629             DAG.getConstant(SimpleSize, DL,
3630                             getShiftAmountTy(N1.getValueType())));
3631       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3632     }
3633   }
3634
3635   return SDValue();
3636 }
3637
3638 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3639   SDValue N0 = N->getOperand(0);
3640   SDValue N1 = N->getOperand(1);
3641   EVT VT = N->getValueType(0);
3642   SDLoc DL(N);
3643
3644   if (VT.isVector()) {
3645     // fold (mulhu x, 0) -> 0
3646     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3647       return N1;
3648     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3649       return N0;
3650   }
3651
3652   // fold (mulhu x, 0) -> 0
3653   if (isNullConstant(N1))
3654     return N1;
3655   // fold (mulhu x, 1) -> 0
3656   if (isOneConstant(N1))
3657     return DAG.getConstant(0, DL, N0.getValueType());
3658   // fold (mulhu x, undef) -> 0
3659   if (N0.isUndef() || N1.isUndef())
3660     return DAG.getConstant(0, DL, VT);
3661
3662   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3663   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3664       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3665     SDLoc DL(N);
3666     unsigned NumEltBits = VT.getScalarSizeInBits();
3667     SDValue LogBase2 = BuildLogBase2(N1, DL);
3668     SDValue SRLAmt = DAG.getNode(
3669         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3670     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3671     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3672     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3673   }
3674
3675   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3676   // plus a shift.
3677   if (VT.isSimple() && !VT.isVector()) {
3678     MVT Simple = VT.getSimpleVT();
3679     unsigned SimpleSize = Simple.getSizeInBits();
3680     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3681     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3682       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3683       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3684       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3685       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3686             DAG.getConstant(SimpleSize, DL,
3687                             getShiftAmountTy(N1.getValueType())));
3688       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3689     }
3690   }
3691
3692   return SDValue();
3693 }
3694
3695 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3696 /// give the opcodes for the two computations that are being performed. Return
3697 /// true if a simplification was made.
3698 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3699                                                 unsigned HiOp) {
3700   // If the high half is not needed, just compute the low half.
3701   bool HiExists = N->hasAnyUseOfValue(1);
3702   if (!HiExists && (!LegalOperations ||
3703                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3704     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3705     return CombineTo(N, Res, Res);
3706   }
3707
3708   // If the low half is not needed, just compute the high half.
3709   bool LoExists = N->hasAnyUseOfValue(0);
3710   if (!LoExists && (!LegalOperations ||
3711                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3712     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3713     return CombineTo(N, Res, Res);
3714   }
3715
3716   // If both halves are used, return as it is.
3717   if (LoExists && HiExists)
3718     return SDValue();
3719
3720   // If the two computed results can be simplified separately, separate them.
3721   if (LoExists) {
3722     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3723     AddToWorklist(Lo.getNode());
3724     SDValue LoOpt = combine(Lo.getNode());
3725     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3726         (!LegalOperations ||
3727          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3728       return CombineTo(N, LoOpt, LoOpt);
3729   }
3730
3731   if (HiExists) {
3732     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3733     AddToWorklist(Hi.getNode());
3734     SDValue HiOpt = combine(Hi.getNode());
3735     if (HiOpt.getNode() && HiOpt != Hi &&
3736         (!LegalOperations ||
3737          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3738       return CombineTo(N, HiOpt, HiOpt);
3739   }
3740
3741   return SDValue();
3742 }
3743
3744 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3745   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3746     return Res;
3747
3748   EVT VT = N->getValueType(0);
3749   SDLoc DL(N);
3750
3751   // If the type is twice as wide is legal, transform the mulhu to a wider
3752   // multiply plus a shift.
3753   if (VT.isSimple() && !VT.isVector()) {
3754     MVT Simple = VT.getSimpleVT();
3755     unsigned SimpleSize = Simple.getSizeInBits();
3756     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3757     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3758       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3759       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3760       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3761       // Compute the high part as N1.
3762       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3763             DAG.getConstant(SimpleSize, DL,
3764                             getShiftAmountTy(Lo.getValueType())));
3765       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3766       // Compute the low part as N0.
3767       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3768       return CombineTo(N, Lo, Hi);
3769     }
3770   }
3771
3772   return SDValue();
3773 }
3774
3775 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3776   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3777     return Res;
3778
3779   EVT VT = N->getValueType(0);
3780   SDLoc DL(N);
3781
3782   // If the type is twice as wide is legal, transform the mulhu to a wider
3783   // multiply plus a shift.
3784   if (VT.isSimple() && !VT.isVector()) {
3785     MVT Simple = VT.getSimpleVT();
3786     unsigned SimpleSize = Simple.getSizeInBits();
3787     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3788     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3789       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3790       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3791       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3792       // Compute the high part as N1.
3793       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3794             DAG.getConstant(SimpleSize, DL,
3795                             getShiftAmountTy(Lo.getValueType())));
3796       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3797       // Compute the low part as N0.
3798       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3799       return CombineTo(N, Lo, Hi);
3800     }
3801   }
3802
3803   return SDValue();
3804 }
3805
3806 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3807   // (smulo x, 2) -> (saddo x, x)
3808   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3809     if (C2->getAPIntValue() == 2)
3810       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3811                          N->getOperand(0), N->getOperand(0));
3812
3813   return SDValue();
3814 }
3815
3816 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3817   // (umulo x, 2) -> (uaddo x, x)
3818   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3819     if (C2->getAPIntValue() == 2)
3820       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3821                          N->getOperand(0), N->getOperand(0));
3822
3823   return SDValue();
3824 }
3825
3826 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3827   SDValue N0 = N->getOperand(0);
3828   SDValue N1 = N->getOperand(1);
3829   EVT VT = N0.getValueType();
3830
3831   // fold vector ops
3832   if (VT.isVector())
3833     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3834       return FoldedVOp;
3835
3836   // fold operation with constant operands.
3837   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3838   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3839   if (N0C && N1C)
3840     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3841
3842   // canonicalize constant to RHS
3843   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3844      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3845     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3846
3847   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3848   // Only do this if the current op isn't legal and the flipped is.
3849   unsigned Opcode = N->getOpcode();
3850   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3851   if (!TLI.isOperationLegal(Opcode, VT) &&
3852       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3853       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3854     unsigned AltOpcode;
3855     switch (Opcode) {
3856     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3857     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3858     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3859     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3860     default: llvm_unreachable("Unknown MINMAX opcode");
3861     }
3862     if (TLI.isOperationLegal(AltOpcode, VT))
3863       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3864   }
3865
3866   return SDValue();
3867 }
3868
3869 /// If this is a bitwise logic instruction and both operands have the same
3870 /// opcode, try to sink the other opcode after the logic instruction.
3871 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
3872   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3873   EVT VT = N0.getValueType();
3874   unsigned LogicOpcode = N->getOpcode();
3875   unsigned HandOpcode = N0.getOpcode();
3876   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
3877           LogicOpcode == ISD::XOR) && "Expected logic opcode");
3878   assert(HandOpcode == N1.getOpcode() && "Bad input!");
3879
3880   // Bail early if none of these transforms apply.
3881   if (N0.getNumOperands() == 0)
3882     return SDValue();
3883
3884   // FIXME: We should check number of uses of the operands to not increase
3885   //        the instruction count for all transforms.
3886
3887   // Handle size-changing casts.
3888   SDValue X = N0.getOperand(0);
3889   SDValue Y = N1.getOperand(0);
3890   EVT XVT = X.getValueType();
3891   SDLoc DL(N);
3892   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
3893       HandOpcode == ISD::SIGN_EXTEND) {
3894     // If both operands have other uses, this transform would create extra
3895     // instructions without eliminating anything.
3896     if (!N0.hasOneUse() && !N1.hasOneUse())
3897       return SDValue();
3898     // We need matching integer source types.
3899     if (XVT != Y.getValueType())
3900       return SDValue();
3901     // Don't create an illegal op during or after legalization. Don't ever
3902     // create an unsupported vector op.
3903     if ((VT.isVector() || LegalOperations) &&
3904         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
3905       return SDValue();
3906     // Avoid infinite looping with PromoteIntBinOp.
3907     // TODO: Should we apply desirable/legal constraints to all opcodes?
3908     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
3909         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
3910       return SDValue();
3911     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
3912     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3913     return DAG.getNode(HandOpcode, DL, VT, Logic);
3914   }
3915
3916   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
3917   if (HandOpcode == ISD::TRUNCATE) {
3918     // If both operands have other uses, this transform would create extra
3919     // instructions without eliminating anything.
3920     if (!N0.hasOneUse() && !N1.hasOneUse())
3921       return SDValue();
3922     // We need matching source types.
3923     if (XVT != Y.getValueType())
3924       return SDValue();
3925     // Don't create an illegal op during or after legalization.
3926     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
3927       return SDValue();
3928     // Be extra careful sinking truncate. If it's free, there's no benefit in
3929     // widening a binop. Also, don't create a logic op on an illegal type.
3930     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
3931       return SDValue();
3932     if (!TLI.isTypeLegal(XVT))
3933       return SDValue();
3934     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3935     return DAG.getNode(HandOpcode, DL, VT, Logic);
3936   }
3937
3938   // For binops SHL/SRL/SRA/AND:
3939   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
3940   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
3941        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
3942       N0.getOperand(1) == N1.getOperand(1)) {
3943     // If either operand has other uses, this transform is not an improvement.
3944     if (!N0.hasOneUse() || !N1.hasOneUse())
3945       return SDValue();
3946     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3947     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
3948   }
3949
3950   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
3951   if (HandOpcode == ISD::BSWAP) {
3952     // If either operand has other uses, this transform is not an improvement.
3953     if (!N0.hasOneUse() || !N1.hasOneUse())
3954       return SDValue();
3955     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3956     return DAG.getNode(HandOpcode, DL, VT, Logic);
3957   }
3958
3959   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3960   // Only perform this optimization up until type legalization, before
3961   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3962   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3963   // we don't want to undo this promotion.
3964   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3965   // on scalars.
3966   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
3967        Level <= AfterLegalizeTypes) {
3968     // Input types must be integer and the same.
3969     if (XVT.isInteger() && XVT == Y.getValueType()) {
3970       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3971       return DAG.getNode(HandOpcode, DL, VT, Logic);
3972     }
3973   }
3974
3975   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3976   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3977   // If both shuffles use the same mask, and both shuffle within a single
3978   // vector, then it is worthwhile to move the swizzle after the operation.
3979   // The type-legalizer generates this pattern when loading illegal
3980   // vector types from memory. In many cases this allows additional shuffle
3981   // optimizations.
3982   // There are other cases where moving the shuffle after the xor/and/or
3983   // is profitable even if shuffles don't perform a swizzle.
3984   // If both shuffles use the same mask, and both shuffles have the same first
3985   // or second operand, then it might still be profitable to move the shuffle
3986   // after the xor/and/or operation.
3987   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3988     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
3989     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
3990     assert(X.getValueType() == Y.getValueType() &&
3991            "Inputs to shuffles are not the same type");
3992
3993     // Check that both shuffles use the same mask. The masks are known to be of
3994     // the same length because the result vector type is the same.
3995     // Check also that shuffles have only one use to avoid introducing extra
3996     // instructions.
3997     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
3998         !SVN0->getMask().equals(SVN1->getMask()))
3999       return SDValue();
4000
4001     // Don't try to fold this node if it requires introducing a
4002     // build vector of all zeros that might be illegal at this stage.
4003     SDValue ShOp = N0.getOperand(1);
4004     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4005       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4006
4007     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4008     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4009       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4010                                   N0.getOperand(0), N1.getOperand(0));
4011       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4012     }
4013
4014     // Don't try to fold this node if it requires introducing a
4015     // build vector of all zeros that might be illegal at this stage.
4016     ShOp = N0.getOperand(0);
4017     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4018       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4019
4020     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4021     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4022       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4023                                   N1.getOperand(1));
4024       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4025     }
4026   }
4027
4028   return SDValue();
4029 }
4030
4031 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4032 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4033                                        const SDLoc &DL) {
4034   SDValue LL, LR, RL, RR, N0CC, N1CC;
4035   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4036       !isSetCCEquivalent(N1, RL, RR, N1CC))
4037     return SDValue();
4038
4039   assert(N0.getValueType() == N1.getValueType() &&
4040          "Unexpected operand types for bitwise logic op");
4041   assert(LL.getValueType() == LR.getValueType() &&
4042          RL.getValueType() == RR.getValueType() &&
4043          "Unexpected operand types for setcc");
4044
4045   // If we're here post-legalization or the logic op type is not i1, the logic
4046   // op type must match a setcc result type. Also, all folds require new
4047   // operations on the left and right operands, so those types must match.
4048   EVT VT = N0.getValueType();
4049   EVT OpVT = LL.getValueType();
4050   if (LegalOperations || VT.getScalarType() != MVT::i1)
4051     if (VT != getSetCCResultType(OpVT))
4052       return SDValue();
4053   if (OpVT != RL.getValueType())
4054     return SDValue();
4055
4056   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4057   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4058   bool IsInteger = OpVT.isInteger();
4059   if (LR == RR && CC0 == CC1 && IsInteger) {
4060     bool IsZero = isNullOrNullSplat(LR);
4061     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4062
4063     // All bits clear?
4064     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4065     // All sign bits clear?
4066     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4067     // Any bits set?
4068     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4069     // Any sign bits set?
4070     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4071
4072     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4073     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4074     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4075     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4076     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4077       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4078       AddToWorklist(Or.getNode());
4079       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4080     }
4081
4082     // All bits set?
4083     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4084     // All sign bits set?
4085     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4086     // Any bits clear?
4087     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4088     // Any sign bits clear?
4089     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4090
4091     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4092     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4093     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4094     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4095     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4096       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4097       AddToWorklist(And.getNode());
4098       return DAG.getSetCC(DL, VT, And, LR, CC1);
4099     }
4100   }
4101
4102   // TODO: What is the 'or' equivalent of this fold?
4103   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4104   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4105       IsInteger && CC0 == ISD::SETNE &&
4106       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4107        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4108     SDValue One = DAG.getConstant(1, DL, OpVT);
4109     SDValue Two = DAG.getConstant(2, DL, OpVT);
4110     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4111     AddToWorklist(Add.getNode());
4112     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4113   }
4114
4115   // Try more general transforms if the predicates match and the only user of
4116   // the compares is the 'and' or 'or'.
4117   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4118       N0.hasOneUse() && N1.hasOneUse()) {
4119     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4120     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4121     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4122       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4123       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4124       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4125       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4126       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4127     }
4128
4129     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4130     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4131       // Match a shared variable operand and 2 non-opaque constant operands.
4132       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4133       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4134       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4135         // Canonicalize larger constant as C0.
4136         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4137           std::swap(C0, C1);
4138
4139         // The difference of the constants must be a single bit.
4140         const APInt &C0Val = C0->getAPIntValue();
4141         const APInt &C1Val = C1->getAPIntValue();
4142         if ((C0Val - C1Val).isPowerOf2()) {
4143           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4144           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4145           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4146           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4147           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4148           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4149           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4150           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4151         }
4152       }
4153     }
4154   }
4155
4156   // Canonicalize equivalent operands to LL == RL.
4157   if (LL == RR && LR == RL) {
4158     CC1 = ISD::getSetCCSwappedOperands(CC1);
4159     std::swap(RL, RR);
4160   }
4161
4162   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4163   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4164   if (LL == RL && LR == RR) {
4165     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4166                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4167     if (NewCC != ISD::SETCC_INVALID &&
4168         (!LegalOperations ||
4169          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4170           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4171       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4172   }
4173
4174   return SDValue();
4175 }
4176
4177 /// This contains all DAGCombine rules which reduce two values combined by
4178 /// an And operation to a single value. This makes them reusable in the context
4179 /// of visitSELECT(). Rules involving constants are not included as
4180 /// visitSELECT() already handles those cases.
4181 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4182   EVT VT = N1.getValueType();
4183   SDLoc DL(N);
4184
4185   // fold (and x, undef) -> 0
4186   if (N0.isUndef() || N1.isUndef())
4187     return DAG.getConstant(0, DL, VT);
4188
4189   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4190     return V;
4191
4192   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4193       VT.getSizeInBits() <= 64) {
4194     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4195       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4196         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4197         // immediate for an add, but it is legal if its top c2 bits are set,
4198         // transform the ADD so the immediate doesn't need to be materialized
4199         // in a register.
4200         APInt ADDC = ADDI->getAPIntValue();
4201         APInt SRLC = SRLI->getAPIntValue();
4202         if (ADDC.getMinSignedBits() <= 64 &&
4203             SRLC.ult(VT.getSizeInBits()) &&
4204             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4205           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4206                                              SRLC.getZExtValue());
4207           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4208             ADDC |= Mask;
4209             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4210               SDLoc DL0(N0);
4211               SDValue NewAdd =
4212                 DAG.getNode(ISD::ADD, DL0, VT,
4213                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4214               CombineTo(N0.getNode(), NewAdd);
4215               // Return N so it doesn't get rechecked!
4216               return SDValue(N, 0);
4217             }
4218           }
4219         }
4220       }
4221     }
4222   }
4223
4224   // Reduce bit extract of low half of an integer to the narrower type.
4225   // (and (srl i64:x, K), KMask) ->
4226   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4227   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4228     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4229       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4230         unsigned Size = VT.getSizeInBits();
4231         const APInt &AndMask = CAnd->getAPIntValue();
4232         unsigned ShiftBits = CShift->getZExtValue();
4233
4234         // Bail out, this node will probably disappear anyway.
4235         if (ShiftBits == 0)
4236           return SDValue();
4237
4238         unsigned MaskBits = AndMask.countTrailingOnes();
4239         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4240
4241         if (AndMask.isMask() &&
4242             // Required bits must not span the two halves of the integer and
4243             // must fit in the half size type.
4244             (ShiftBits + MaskBits <= Size / 2) &&
4245             TLI.isNarrowingProfitable(VT, HalfVT) &&
4246             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4247             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4248             TLI.isTruncateFree(VT, HalfVT) &&
4249             TLI.isZExtFree(HalfVT, VT)) {
4250           // The isNarrowingProfitable is to avoid regressions on PPC and
4251           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4252           // on downstream users of this. Those patterns could probably be
4253           // extended to handle extensions mixed in.
4254
4255           SDValue SL(N0);
4256           assert(MaskBits <= Size);
4257
4258           // Extracting the highest bit of the low half.
4259           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4260           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4261                                       N0.getOperand(0));
4262
4263           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4264           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4265           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4266           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4267           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4268         }
4269       }
4270     }
4271   }
4272
4273   return SDValue();
4274 }
4275
4276 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4277                                    EVT LoadResultTy, EVT &ExtVT) {
4278   if (!AndC->getAPIntValue().isMask())
4279     return false;
4280
4281   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4282
4283   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4284   EVT LoadedVT = LoadN->getMemoryVT();
4285
4286   if (ExtVT == LoadedVT &&
4287       (!LegalOperations ||
4288        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4289     // ZEXTLOAD will match without needing to change the size of the value being
4290     // loaded.
4291     return true;
4292   }
4293
4294   // Do not change the width of a volatile load.
4295   if (LoadN->isVolatile())
4296     return false;
4297
4298   // Do not generate loads of non-round integer types since these can
4299   // be expensive (and would be wrong if the type is not byte sized).
4300   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4301     return false;
4302
4303   if (LegalOperations &&
4304       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4305     return false;
4306
4307   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4308     return false;
4309
4310   return true;
4311 }
4312
4313 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4314                                     ISD::LoadExtType ExtType, EVT &MemVT,
4315                                     unsigned ShAmt) {
4316   if (!LDST)
4317     return false;
4318   // Only allow byte offsets.
4319   if (ShAmt % 8)
4320     return false;
4321
4322   // Do not generate loads of non-round integer types since these can
4323   // be expensive (and would be wrong if the type is not byte sized).
4324   if (!MemVT.isRound())
4325     return false;
4326
4327   // Don't change the width of a volatile load.
4328   if (LDST->isVolatile())
4329     return false;
4330
4331   // Verify that we are actually reducing a load width here.
4332   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4333     return false;
4334
4335   // Ensure that this isn't going to produce an unsupported unaligned access.
4336   if (ShAmt &&
4337       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4338                               LDST->getAddressSpace(), ShAmt / 8))
4339     return false;
4340
4341   // It's not possible to generate a constant of extended or untyped type.
4342   EVT PtrType = LDST->getBasePtr().getValueType();
4343   if (PtrType == MVT::Untyped || PtrType.isExtended())
4344     return false;
4345
4346   if (isa<LoadSDNode>(LDST)) {
4347     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4348     // Don't transform one with multiple uses, this would require adding a new
4349     // load.
4350     if (!SDValue(Load, 0).hasOneUse())
4351       return false;
4352
4353     if (LegalOperations &&
4354         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4355       return false;
4356
4357     // For the transform to be legal, the load must produce only two values
4358     // (the value loaded and the chain).  Don't transform a pre-increment
4359     // load, for example, which produces an extra value.  Otherwise the
4360     // transformation is not equivalent, and the downstream logic to replace
4361     // uses gets things wrong.
4362     if (Load->getNumValues() > 2)
4363       return false;
4364
4365     // If the load that we're shrinking is an extload and we're not just
4366     // discarding the extension we can't simply shrink the load. Bail.
4367     // TODO: It would be possible to merge the extensions in some cases.
4368     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4369         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4370       return false;
4371
4372     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4373       return false;
4374   } else {
4375     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4376     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4377     // Can't write outside the original store
4378     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4379       return false;
4380
4381     if (LegalOperations &&
4382         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4383       return false;
4384   }
4385   return true;
4386 }
4387
4388 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4389                                     SmallVectorImpl<LoadSDNode*> &Loads,
4390                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4391                                     ConstantSDNode *Mask,
4392                                     SDNode *&NodeToMask) {
4393   // Recursively search for the operands, looking for loads which can be
4394   // narrowed.
4395   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4396     SDValue Op = N->getOperand(i);
4397
4398     if (Op.getValueType().isVector())
4399       return false;
4400
4401     // Some constants may need fixing up later if they are too large.
4402     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4403       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4404           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4405         NodesWithConsts.insert(N);
4406       continue;
4407     }
4408
4409     if (!Op.hasOneUse())
4410       return false;
4411
4412     switch(Op.getOpcode()) {
4413     case ISD::LOAD: {
4414       auto *Load = cast<LoadSDNode>(Op);
4415       EVT ExtVT;
4416       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4417           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4418
4419         // ZEXTLOAD is already small enough.
4420         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4421             ExtVT.bitsGE(Load->getMemoryVT()))
4422           continue;
4423
4424         // Use LE to convert equal sized loads to zext.
4425         if (ExtVT.bitsLE(Load->getMemoryVT()))
4426           Loads.push_back(Load);
4427
4428         continue;
4429       }
4430       return false;
4431     }
4432     case ISD::ZERO_EXTEND:
4433     case ISD::AssertZext: {
4434       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4435       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4436       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4437         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4438         Op.getOperand(0).getValueType();
4439
4440       // We can accept extending nodes if the mask is wider or an equal
4441       // width to the original type.
4442       if (ExtVT.bitsGE(VT))
4443         continue;
4444       break;
4445     }
4446     case ISD::OR:
4447     case ISD::XOR:
4448     case ISD::AND:
4449       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4450                              NodeToMask))
4451         return false;
4452       continue;
4453     }
4454
4455     // Allow one node which will masked along with any loads found.
4456     if (NodeToMask)
4457       return false;
4458
4459     // Also ensure that the node to be masked only produces one data result.
4460     NodeToMask = Op.getNode();
4461     if (NodeToMask->getNumValues() > 1) {
4462       bool HasValue = false;
4463       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4464         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4465         if (VT != MVT::Glue && VT != MVT::Other) {
4466           if (HasValue) {
4467             NodeToMask = nullptr;
4468             return false;
4469           }
4470           HasValue = true;
4471         }
4472       }
4473       assert(HasValue && "Node to be masked has no data result?");
4474     }
4475   }
4476   return true;
4477 }
4478
4479 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4480   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4481   if (!Mask)
4482     return false;
4483
4484   if (!Mask->getAPIntValue().isMask())
4485     return false;
4486
4487   // No need to do anything if the and directly uses a load.
4488   if (isa<LoadSDNode>(N->getOperand(0)))
4489     return false;
4490
4491   SmallVector<LoadSDNode*, 8> Loads;
4492   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4493   SDNode *FixupNode = nullptr;
4494   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4495     if (Loads.size() == 0)
4496       return false;
4497
4498     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4499     SDValue MaskOp = N->getOperand(1);
4500
4501     // If it exists, fixup the single node we allow in the tree that needs
4502     // masking.
4503     if (FixupNode) {
4504       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4505       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4506                                 FixupNode->getValueType(0),
4507                                 SDValue(FixupNode, 0), MaskOp);
4508       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4509       if (And.getOpcode() == ISD ::AND)
4510         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4511     }
4512
4513     // Narrow any constants that need it.
4514     for (auto *LogicN : NodesWithConsts) {
4515       SDValue Op0 = LogicN->getOperand(0);
4516       SDValue Op1 = LogicN->getOperand(1);
4517
4518       if (isa<ConstantSDNode>(Op0))
4519           std::swap(Op0, Op1);
4520
4521       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4522                                 Op1, MaskOp);
4523
4524       DAG.UpdateNodeOperands(LogicN, Op0, And);
4525     }
4526
4527     // Create narrow loads.
4528     for (auto *Load : Loads) {
4529       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4530       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4531                                 SDValue(Load, 0), MaskOp);
4532       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4533       if (And.getOpcode() == ISD ::AND)
4534         And = SDValue(
4535             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4536       SDValue NewLoad = ReduceLoadWidth(And.getNode());
4537       assert(NewLoad &&
4538              "Shouldn't be masking the load if it can't be narrowed");
4539       CombineTo(Load, NewLoad, NewLoad.getValue(1));
4540     }
4541     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4542     return true;
4543   }
4544   return false;
4545 }
4546
4547 // Unfold
4548 //    x &  (-1 'logical shift' y)
4549 // To
4550 //    (x 'opposite logical shift' y) 'logical shift' y
4551 // if it is better for performance.
4552 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4553   assert(N->getOpcode() == ISD::AND);
4554
4555   SDValue N0 = N->getOperand(0);
4556   SDValue N1 = N->getOperand(1);
4557
4558   // Do we actually prefer shifts over mask?
4559   if (!TLI.preferShiftsToClearExtremeBits(N0))
4560     return SDValue();
4561
4562   // Try to match  (-1 '[outer] logical shift' y)
4563   unsigned OuterShift;
4564   unsigned InnerShift; // The opposite direction to the OuterShift.
4565   SDValue Y;           // Shift amount.
4566   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4567     if (!M.hasOneUse())
4568       return false;
4569     OuterShift = M->getOpcode();
4570     if (OuterShift == ISD::SHL)
4571       InnerShift = ISD::SRL;
4572     else if (OuterShift == ISD::SRL)
4573       InnerShift = ISD::SHL;
4574     else
4575       return false;
4576     if (!isAllOnesConstant(M->getOperand(0)))
4577       return false;
4578     Y = M->getOperand(1);
4579     return true;
4580   };
4581
4582   SDValue X;
4583   if (matchMask(N1))
4584     X = N0;
4585   else if (matchMask(N0))
4586     X = N1;
4587   else
4588     return SDValue();
4589
4590   SDLoc DL(N);
4591   EVT VT = N->getValueType(0);
4592
4593   //     tmp = x   'opposite logical shift' y
4594   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4595   //     ret = tmp 'logical shift' y
4596   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4597
4598   return T1;
4599 }
4600
4601 SDValue DAGCombiner::visitAND(SDNode *N) {
4602   SDValue N0 = N->getOperand(0);
4603   SDValue N1 = N->getOperand(1);
4604   EVT VT = N1.getValueType();
4605
4606   // x & x --> x
4607   if (N0 == N1)
4608     return N0;
4609
4610   // fold vector ops
4611   if (VT.isVector()) {
4612     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4613       return FoldedVOp;
4614
4615     // fold (and x, 0) -> 0, vector edition
4616     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4617       // do not return N0, because undef node may exist in N0
4618       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
4619                              SDLoc(N), N0.getValueType());
4620     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4621       // do not return N1, because undef node may exist in N1
4622       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
4623                              SDLoc(N), N1.getValueType());
4624
4625     // fold (and x, -1) -> x, vector edition
4626     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4627       return N1;
4628     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4629       return N0;
4630   }
4631
4632   // fold (and c1, c2) -> c1&c2
4633   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4634   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4635   if (N0C && N1C && !N1C->isOpaque())
4636     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4637   // canonicalize constant to RHS
4638   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4639       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4640     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4641   // fold (and x, -1) -> x
4642   if (isAllOnesConstant(N1))
4643     return N0;
4644   // if (and x, c) is known to be zero, return 0
4645   unsigned BitWidth = VT.getScalarSizeInBits();
4646   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4647                                    APInt::getAllOnesValue(BitWidth)))
4648     return DAG.getConstant(0, SDLoc(N), VT);
4649
4650   if (SDValue NewSel = foldBinOpIntoSelect(N))
4651     return NewSel;
4652
4653   // reassociate and
4654   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4655     return RAND;
4656
4657   // Try to convert a constant mask AND into a shuffle clear mask.
4658   if (VT.isVector())
4659     if (SDValue Shuffle = XformToShuffleWithZero(N))
4660       return Shuffle;
4661
4662   // fold (and (or x, C), D) -> D if (C & D) == D
4663   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4664     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4665   };
4666   if (N0.getOpcode() == ISD::OR &&
4667       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4668     return N1;
4669   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4670   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4671     SDValue N0Op0 = N0.getOperand(0);
4672     APInt Mask = ~N1C->getAPIntValue();
4673     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4674     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4675       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4676                                  N0.getValueType(), N0Op0);
4677
4678       // Replace uses of the AND with uses of the Zero extend node.
4679       CombineTo(N, Zext);
4680
4681       // We actually want to replace all uses of the any_extend with the
4682       // zero_extend, to avoid duplicating things.  This will later cause this
4683       // AND to be folded.
4684       CombineTo(N0.getNode(), Zext);
4685       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4686     }
4687   }
4688   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4689   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4690   // already be zero by virtue of the width of the base type of the load.
4691   //
4692   // the 'X' node here can either be nothing or an extract_vector_elt to catch
4693   // more cases.
4694   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4695        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4696        N0.getOperand(0).getOpcode() == ISD::LOAD &&
4697        N0.getOperand(0).getResNo() == 0) ||
4698       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4699     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4700                                          N0 : N0.getOperand(0) );
4701
4702     // Get the constant (if applicable) the zero'th operand is being ANDed with.
4703     // This can be a pure constant or a vector splat, in which case we treat the
4704     // vector as a scalar and use the splat value.
4705     APInt Constant = APInt::getNullValue(1);
4706     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4707       Constant = C->getAPIntValue();
4708     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4709       APInt SplatValue, SplatUndef;
4710       unsigned SplatBitSize;
4711       bool HasAnyUndefs;
4712       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4713                                              SplatBitSize, HasAnyUndefs);
4714       if (IsSplat) {
4715         // Undef bits can contribute to a possible optimisation if set, so
4716         // set them.
4717         SplatValue |= SplatUndef;
4718
4719         // The splat value may be something like "0x00FFFFFF", which means 0 for
4720         // the first vector value and FF for the rest, repeating. We need a mask
4721         // that will apply equally to all members of the vector, so AND all the
4722         // lanes of the constant together.
4723         EVT VT = Vector->getValueType(0);
4724         unsigned BitWidth = VT.getScalarSizeInBits();
4725
4726         // If the splat value has been compressed to a bitlength lower
4727         // than the size of the vector lane, we need to re-expand it to
4728         // the lane size.
4729         if (BitWidth > SplatBitSize)
4730           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4731                SplatBitSize < BitWidth;
4732                SplatBitSize = SplatBitSize * 2)
4733             SplatValue |= SplatValue.shl(SplatBitSize);
4734
4735         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4736         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4737         if (SplatBitSize % BitWidth == 0) {
4738           Constant = APInt::getAllOnesValue(BitWidth);
4739           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4740             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4741         }
4742       }
4743     }
4744
4745     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4746     // actually legal and isn't going to get expanded, else this is a false
4747     // optimisation.
4748     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4749                                                     Load->getValueType(0),
4750                                                     Load->getMemoryVT());
4751
4752     // Resize the constant to the same size as the original memory access before
4753     // extension. If it is still the AllOnesValue then this AND is completely
4754     // unneeded.
4755     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4756
4757     bool B;
4758     switch (Load->getExtensionType()) {
4759     default: B = false; break;
4760     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4761     case ISD::ZEXTLOAD:
4762     case ISD::NON_EXTLOAD: B = true; break;
4763     }
4764
4765     if (B && Constant.isAllOnesValue()) {
4766       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4767       // preserve semantics once we get rid of the AND.
4768       SDValue NewLoad(Load, 0);
4769
4770       // Fold the AND away. NewLoad may get replaced immediately.
4771       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4772
4773       if (Load->getExtensionType() == ISD::EXTLOAD) {
4774         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4775                               Load->getValueType(0), SDLoc(Load),
4776                               Load->getChain(), Load->getBasePtr(),
4777                               Load->getOffset(), Load->getMemoryVT(),
4778                               Load->getMemOperand());
4779         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4780         if (Load->getNumValues() == 3) {
4781           // PRE/POST_INC loads have 3 values.
4782           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4783                            NewLoad.getValue(2) };
4784           CombineTo(Load, To, 3, true);
4785         } else {
4786           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4787         }
4788       }
4789
4790       return SDValue(N, 0); // Return N so it doesn't get rechecked!
4791     }
4792   }
4793
4794   // fold (and (load x), 255) -> (zextload x, i8)
4795   // fold (and (extload x, i16), 255) -> (zextload x, i8)
4796   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4797   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4798                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
4799                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4800     if (SDValue Res = ReduceLoadWidth(N)) {
4801       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4802         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4803       AddToWorklist(N);
4804       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
4805       return SDValue(N, 0);
4806     }
4807   }
4808
4809   if (Level >= AfterLegalizeTypes) {
4810     // Attempt to propagate the AND back up to the leaves which, if they're
4811     // loads, can be combined to narrow loads and the AND node can be removed.
4812     // Perform after legalization so that extend nodes will already be
4813     // combined into the loads.
4814     if (BackwardsPropagateMask(N, DAG)) {
4815       return SDValue(N, 0);
4816     }
4817   }
4818
4819   if (SDValue Combined = visitANDLike(N0, N1, N))
4820     return Combined;
4821
4822   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
4823   if (N0.getOpcode() == N1.getOpcode())
4824     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
4825       return V;
4826
4827   // Masking the negated extension of a boolean is just the zero-extended
4828   // boolean:
4829   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4830   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4831   //
4832   // Note: the SimplifyDemandedBits fold below can make an information-losing
4833   // transform, and then we have no way to find this better fold.
4834   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4835     if (isNullOrNullSplat(N0.getOperand(0))) {
4836       SDValue SubRHS = N0.getOperand(1);
4837       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4838           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4839         return SubRHS;
4840       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4841           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4842         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4843     }
4844   }
4845
4846   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4847   // fold (and (sra)) -> (and (srl)) when possible.
4848   if (SimplifyDemandedBits(SDValue(N, 0)))
4849     return SDValue(N, 0);
4850
4851   // fold (zext_inreg (extload x)) -> (zextload x)
4852   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4853     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4854     EVT MemVT = LN0->getMemoryVT();
4855     // If we zero all the possible extended bits, then we can turn this into
4856     // a zextload if we are running before legalize or the operation is legal.
4857     unsigned BitWidth = N1.getScalarValueSizeInBits();
4858     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4859                            BitWidth - MemVT.getScalarSizeInBits())) &&
4860         ((!LegalOperations && !LN0->isVolatile()) ||
4861          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4862       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4863                                        LN0->getChain(), LN0->getBasePtr(),
4864                                        MemVT, LN0->getMemOperand());
4865       AddToWorklist(N);
4866       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4867       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4868     }
4869   }
4870   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4871   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4872       N0.hasOneUse()) {
4873     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4874     EVT MemVT = LN0->getMemoryVT();
4875     // If we zero all the possible extended bits, then we can turn this into
4876     // a zextload if we are running before legalize or the operation is legal.
4877     unsigned BitWidth = N1.getScalarValueSizeInBits();
4878     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4879                            BitWidth - MemVT.getScalarSizeInBits())) &&
4880         ((!LegalOperations && !LN0->isVolatile()) ||
4881          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4882       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4883                                        LN0->getChain(), LN0->getBasePtr(),
4884                                        MemVT, LN0->getMemOperand());
4885       AddToWorklist(N);
4886       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4887       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4888     }
4889   }
4890   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4891   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4892     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4893                                            N0.getOperand(1), false))
4894       return BSwap;
4895   }
4896
4897   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4898     return Shifts;
4899
4900   return SDValue();
4901 }
4902
4903 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4904 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4905                                         bool DemandHighBits) {
4906   if (!LegalOperations)
4907     return SDValue();
4908
4909   EVT VT = N->getValueType(0);
4910   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4911     return SDValue();
4912   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4913     return SDValue();
4914
4915   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4916   bool LookPassAnd0 = false;
4917   bool LookPassAnd1 = false;
4918   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4919       std::swap(N0, N1);
4920   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4921       std::swap(N0, N1);
4922   if (N0.getOpcode() == ISD::AND) {
4923     if (!N0.getNode()->hasOneUse())
4924       return SDValue();
4925     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4926     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4927     // This is needed for X86.
4928     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4929                   N01C->getZExtValue() != 0xFFFF))
4930       return SDValue();
4931     N0 = N0.getOperand(0);
4932     LookPassAnd0 = true;
4933   }
4934
4935   if (N1.getOpcode() == ISD::AND) {
4936     if (!N1.getNode()->hasOneUse())
4937       return SDValue();
4938     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4939     if (!N11C || N11C->getZExtValue() != 0xFF)
4940       return SDValue();
4941     N1 = N1.getOperand(0);
4942     LookPassAnd1 = true;
4943   }
4944
4945   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4946     std::swap(N0, N1);
4947   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4948     return SDValue();
4949   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4950     return SDValue();
4951
4952   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4953   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4954   if (!N01C || !N11C)
4955     return SDValue();
4956   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4957     return SDValue();
4958
4959   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4960   SDValue N00 = N0->getOperand(0);
4961   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4962     if (!N00.getNode()->hasOneUse())
4963       return SDValue();
4964     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4965     if (!N001C || N001C->getZExtValue() != 0xFF)
4966       return SDValue();
4967     N00 = N00.getOperand(0);
4968     LookPassAnd0 = true;
4969   }
4970
4971   SDValue N10 = N1->getOperand(0);
4972   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4973     if (!N10.getNode()->hasOneUse())
4974       return SDValue();
4975     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4976     // Also allow 0xFFFF since the bits will be shifted out. This is needed
4977     // for X86.
4978     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4979                    N101C->getZExtValue() != 0xFFFF))
4980       return SDValue();
4981     N10 = N10.getOperand(0);
4982     LookPassAnd1 = true;
4983   }
4984
4985   if (N00 != N10)
4986     return SDValue();
4987
4988   // Make sure everything beyond the low halfword gets set to zero since the SRL
4989   // 16 will clear the top bits.
4990   unsigned OpSizeInBits = VT.getSizeInBits();
4991   if (DemandHighBits && OpSizeInBits > 16) {
4992     // If the left-shift isn't masked out then the only way this is a bswap is
4993     // if all bits beyond the low 8 are 0. In that case the entire pattern
4994     // reduces to a left shift anyway: leave it for other parts of the combiner.
4995     if (!LookPassAnd0)
4996       return SDValue();
4997
4998     // However, if the right shift isn't masked out then it might be because
4999     // it's not needed. See if we can spot that too.
5000     if (!LookPassAnd1 &&
5001         !DAG.MaskedValueIsZero(
5002             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5003       return SDValue();
5004   }
5005
5006   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5007   if (OpSizeInBits > 16) {
5008     SDLoc DL(N);
5009     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5010                       DAG.getConstant(OpSizeInBits - 16, DL,
5011                                       getShiftAmountTy(VT)));
5012   }
5013   return Res;
5014 }
5015
5016 /// Return true if the specified node is an element that makes up a 32-bit
5017 /// packed halfword byteswap.
5018 /// ((x & 0x000000ff) << 8) |
5019 /// ((x & 0x0000ff00) >> 8) |
5020 /// ((x & 0x00ff0000) << 8) |
5021 /// ((x & 0xff000000) >> 8)
5022 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5023   if (!N.getNode()->hasOneUse())
5024     return false;
5025
5026   unsigned Opc = N.getOpcode();
5027   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5028     return false;
5029
5030   SDValue N0 = N.getOperand(0);
5031   unsigned Opc0 = N0.getOpcode();
5032   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5033     return false;
5034
5035   ConstantSDNode *N1C = nullptr;
5036   // SHL or SRL: look upstream for AND mask operand
5037   if (Opc == ISD::AND)
5038     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5039   else if (Opc0 == ISD::AND)
5040     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5041   if (!N1C)
5042     return false;
5043
5044   unsigned MaskByteOffset;
5045   switch (N1C->getZExtValue()) {
5046   default:
5047     return false;
5048   case 0xFF:       MaskByteOffset = 0; break;
5049   case 0xFF00:     MaskByteOffset = 1; break;
5050   case 0xFFFF:
5051     // In case demanded bits didn't clear the bits that will be shifted out.
5052     // This is needed for X86.
5053     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5054       MaskByteOffset = 1;
5055       break;
5056     }
5057     return false;
5058   case 0xFF0000:   MaskByteOffset = 2; break;
5059   case 0xFF000000: MaskByteOffset = 3; break;
5060   }
5061
5062   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5063   if (Opc == ISD::AND) {
5064     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5065       // (x >> 8) & 0xff
5066       // (x >> 8) & 0xff0000
5067       if (Opc0 != ISD::SRL)
5068         return false;
5069       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5070       if (!C || C->getZExtValue() != 8)
5071         return false;
5072     } else {
5073       // (x << 8) & 0xff00
5074       // (x << 8) & 0xff000000
5075       if (Opc0 != ISD::SHL)
5076         return false;
5077       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5078       if (!C || C->getZExtValue() != 8)
5079         return false;
5080     }
5081   } else if (Opc == ISD::SHL) {
5082     // (x & 0xff) << 8
5083     // (x & 0xff0000) << 8
5084     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5085       return false;
5086     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5087     if (!C || C->getZExtValue() != 8)
5088       return false;
5089   } else { // Opc == ISD::SRL
5090     // (x & 0xff00) >> 8
5091     // (x & 0xff000000) >> 8
5092     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5093       return false;
5094     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5095     if (!C || C->getZExtValue() != 8)
5096       return false;
5097   }
5098
5099   if (Parts[MaskByteOffset])
5100     return false;
5101
5102   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5103   return true;
5104 }
5105
5106 /// Match a 32-bit packed halfword bswap. That is
5107 /// ((x & 0x000000ff) << 8) |
5108 /// ((x & 0x0000ff00) >> 8) |
5109 /// ((x & 0x00ff0000) << 8) |
5110 /// ((x & 0xff000000) >> 8)
5111 /// => (rotl (bswap x), 16)
5112 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5113   if (!LegalOperations)
5114     return SDValue();
5115
5116   EVT VT = N->getValueType(0);
5117   if (VT != MVT::i32)
5118     return SDValue();
5119   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5120     return SDValue();
5121
5122   // Look for either
5123   // (or (or (and), (and)), (or (and), (and)))
5124   // (or (or (or (and), (and)), (and)), (and))
5125   if (N0.getOpcode() != ISD::OR)
5126     return SDValue();
5127   SDValue N00 = N0.getOperand(0);
5128   SDValue N01 = N0.getOperand(1);
5129   SDNode *Parts[4] = {};
5130
5131   if (N1.getOpcode() == ISD::OR &&
5132       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
5133     // (or (or (and), (and)), (or (and), (and)))
5134     if (!isBSwapHWordElement(N00, Parts))
5135       return SDValue();
5136
5137     if (!isBSwapHWordElement(N01, Parts))
5138       return SDValue();
5139     SDValue N10 = N1.getOperand(0);
5140     if (!isBSwapHWordElement(N10, Parts))
5141       return SDValue();
5142     SDValue N11 = N1.getOperand(1);
5143     if (!isBSwapHWordElement(N11, Parts))
5144       return SDValue();
5145   } else {
5146     // (or (or (or (and), (and)), (and)), (and))
5147     if (!isBSwapHWordElement(N1, Parts))
5148       return SDValue();
5149     if (!isBSwapHWordElement(N01, Parts))
5150       return SDValue();
5151     if (N00.getOpcode() != ISD::OR)
5152       return SDValue();
5153     SDValue N000 = N00.getOperand(0);
5154     if (!isBSwapHWordElement(N000, Parts))
5155       return SDValue();
5156     SDValue N001 = N00.getOperand(1);
5157     if (!isBSwapHWordElement(N001, Parts))
5158       return SDValue();
5159   }
5160
5161   // Make sure the parts are all coming from the same node.
5162   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5163     return SDValue();
5164
5165   SDLoc DL(N);
5166   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5167                               SDValue(Parts[0], 0));
5168
5169   // Result of the bswap should be rotated by 16. If it's not legal, then
5170   // do  (x << 16) | (x >> 16).
5171   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5172   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5173     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5174   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5175     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5176   return DAG.getNode(ISD::OR, DL, VT,
5177                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5178                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5179 }
5180
5181 /// This contains all DAGCombine rules which reduce two values combined by
5182 /// an Or operation to a single value \see visitANDLike().
5183 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5184   EVT VT = N1.getValueType();
5185   SDLoc DL(N);
5186
5187   // fold (or x, undef) -> -1
5188   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5189     return DAG.getAllOnesConstant(DL, VT);
5190
5191   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5192     return V;
5193
5194   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5195   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5196       // Don't increase # computations.
5197       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5198     // We can only do this xform if we know that bits from X that are set in C2
5199     // but not in C1 are already zero.  Likewise for Y.
5200     if (const ConstantSDNode *N0O1C =
5201         getAsNonOpaqueConstant(N0.getOperand(1))) {
5202       if (const ConstantSDNode *N1O1C =
5203           getAsNonOpaqueConstant(N1.getOperand(1))) {
5204         // We can only do this xform if we know that bits from X that are set in
5205         // C2 but not in C1 are already zero.  Likewise for Y.
5206         const APInt &LHSMask = N0O1C->getAPIntValue();
5207         const APInt &RHSMask = N1O1C->getAPIntValue();
5208
5209         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5210             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5211           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5212                                   N0.getOperand(0), N1.getOperand(0));
5213           return DAG.getNode(ISD::AND, DL, VT, X,
5214                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5215         }
5216       }
5217     }
5218   }
5219
5220   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5221   if (N0.getOpcode() == ISD::AND &&
5222       N1.getOpcode() == ISD::AND &&
5223       N0.getOperand(0) == N1.getOperand(0) &&
5224       // Don't increase # computations.
5225       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5226     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5227                             N0.getOperand(1), N1.getOperand(1));
5228     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5229   }
5230
5231   return SDValue();
5232 }
5233
5234 SDValue DAGCombiner::visitOR(SDNode *N) {
5235   SDValue N0 = N->getOperand(0);
5236   SDValue N1 = N->getOperand(1);
5237   EVT VT = N1.getValueType();
5238
5239   // x | x --> x
5240   if (N0 == N1)
5241     return N0;
5242
5243   // fold vector ops
5244   if (VT.isVector()) {
5245     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5246       return FoldedVOp;
5247
5248     // fold (or x, 0) -> x, vector edition
5249     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5250       return N1;
5251     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5252       return N0;
5253
5254     // fold (or x, -1) -> -1, vector edition
5255     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5256       // do not return N0, because undef node may exist in N0
5257       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5258     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5259       // do not return N1, because undef node may exist in N1
5260       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5261
5262     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5263     // Do this only if the resulting shuffle is legal.
5264     if (isa<ShuffleVectorSDNode>(N0) &&
5265         isa<ShuffleVectorSDNode>(N1) &&
5266         // Avoid folding a node with illegal type.
5267         TLI.isTypeLegal(VT)) {
5268       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5269       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5270       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5271       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5272       // Ensure both shuffles have a zero input.
5273       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5274         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5275         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5276         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5277         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5278         bool CanFold = true;
5279         int NumElts = VT.getVectorNumElements();
5280         SmallVector<int, 4> Mask(NumElts);
5281
5282         for (int i = 0; i != NumElts; ++i) {
5283           int M0 = SV0->getMaskElt(i);
5284           int M1 = SV1->getMaskElt(i);
5285
5286           // Determine if either index is pointing to a zero vector.
5287           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5288           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5289
5290           // If one element is zero and the otherside is undef, keep undef.
5291           // This also handles the case that both are undef.
5292           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5293             Mask[i] = -1;
5294             continue;
5295           }
5296
5297           // Make sure only one of the elements is zero.
5298           if (M0Zero == M1Zero) {
5299             CanFold = false;
5300             break;
5301           }
5302
5303           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5304
5305           // We have a zero and non-zero element. If the non-zero came from
5306           // SV0 make the index a LHS index. If it came from SV1, make it
5307           // a RHS index. We need to mod by NumElts because we don't care
5308           // which operand it came from in the original shuffles.
5309           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5310         }
5311
5312         if (CanFold) {
5313           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5314           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5315
5316           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5317           if (!LegalMask) {
5318             std::swap(NewLHS, NewRHS);
5319             ShuffleVectorSDNode::commuteMask(Mask);
5320             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5321           }
5322
5323           if (LegalMask)
5324             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5325         }
5326       }
5327     }
5328   }
5329
5330   // fold (or c1, c2) -> c1|c2
5331   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5332   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5333   if (N0C && N1C && !N1C->isOpaque())
5334     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5335   // canonicalize constant to RHS
5336   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5337      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5338     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5339   // fold (or x, 0) -> x
5340   if (isNullConstant(N1))
5341     return N0;
5342   // fold (or x, -1) -> -1
5343   if (isAllOnesConstant(N1))
5344     return N1;
5345
5346   if (SDValue NewSel = foldBinOpIntoSelect(N))
5347     return NewSel;
5348
5349   // fold (or x, c) -> c iff (x & ~c) == 0
5350   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5351     return N1;
5352
5353   if (SDValue Combined = visitORLike(N0, N1, N))
5354     return Combined;
5355
5356   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5357   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5358     return BSwap;
5359   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5360     return BSwap;
5361
5362   // reassociate or
5363   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5364     return ROR;
5365
5366   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5367   // iff (c1 & c2) != 0 or c1/c2 are undef.
5368   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5369     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5370   };
5371   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5372       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5373     if (SDValue COR = DAG.FoldConstantArithmetic(
5374             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5375       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5376       AddToWorklist(IOR.getNode());
5377       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5378     }
5379   }
5380
5381   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5382   if (N0.getOpcode() == N1.getOpcode())
5383     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5384       return V;
5385
5386   // See if this is some rotate idiom.
5387   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5388     return SDValue(Rot, 0);
5389
5390   if (SDValue Load = MatchLoadCombine(N))
5391     return Load;
5392
5393   // Simplify the operands using demanded-bits information.
5394   if (SimplifyDemandedBits(SDValue(N, 0)))
5395     return SDValue(N, 0);
5396
5397   return SDValue();
5398 }
5399
5400 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5401   if (Op.getOpcode() == ISD::AND &&
5402       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5403     Mask = Op.getOperand(1);
5404     return Op.getOperand(0);
5405   }
5406   return Op;
5407 }
5408
5409 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5410 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5411                             SDValue &Mask) {
5412   Op = stripConstantMask(DAG, Op, Mask);
5413   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5414     Shift = Op;
5415     return true;
5416   }
5417   return false;
5418 }
5419
5420 /// Helper function for visitOR to extract the needed side of a rotate idiom
5421 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
5422 /// InstCombine merged some outside op with one of the shifts from
5423 /// the rotate pattern.
5424 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5425 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5426 /// patterns:
5427 ///
5428 ///   (or (mul v c0) (shrl (mul v c1) c2)):
5429 ///     expands (mul v c0) -> (shl (mul v c1) c3)
5430 ///
5431 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
5432 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5433 ///
5434 ///   (or (shl v c0) (shrl (shl v c1) c2)):
5435 ///     expands (shl v c0) -> (shl (shl v c1) c3)
5436 ///
5437 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
5438 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5439 ///
5440 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5441 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5442                                      SDValue ExtractFrom, SDValue &Mask,
5443                                      const SDLoc &DL) {
5444   assert(OppShift && ExtractFrom && "Empty SDValue");
5445   assert(
5446       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5447       "Existing shift must be valid as a rotate half");
5448
5449   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5450   // Preconditions:
5451   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5452   //
5453   // Find opcode of the needed shift to be extracted from (op0 v c0).
5454   unsigned Opcode = ISD::DELETED_NODE;
5455   bool IsMulOrDiv = false;
5456   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5457   // opcode or its arithmetic (mul or udiv) variant.
5458   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5459     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5460     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5461       return false;
5462     Opcode = NeededShift;
5463     return true;
5464   };
5465   // op0 must be either the needed shift opcode or the mul/udiv equivalent
5466   // that the needed shift can be extracted from.
5467   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5468       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5469     return SDValue();
5470
5471   // op0 must be the same opcode on both sides, have the same LHS argument,
5472   // and produce the same value type.
5473   SDValue OppShiftLHS = OppShift.getOperand(0);
5474   EVT ShiftedVT = OppShiftLHS.getValueType();
5475   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5476       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5477       ShiftedVT != ExtractFrom.getValueType())
5478     return SDValue();
5479
5480   // Amount of the existing shift.
5481   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5482   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5483   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5484   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5485   ConstantSDNode *ExtractFromCst =
5486       isConstOrConstSplat(ExtractFrom.getOperand(1));
5487   // TODO: We should be able to handle non-uniform constant vectors for these values
5488   // Check that we have constant values.
5489   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5490       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5491       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5492     return SDValue();
5493
5494   // Compute the shift amount we need to extract to complete the rotate.
5495   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5496   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5497     return SDValue();
5498   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5499   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5500   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5501   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5502   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5503
5504   // Now try extract the needed shift from the ExtractFrom op and see if the
5505   // result matches up with the existing shift's LHS op.
5506   if (IsMulOrDiv) {
5507     // Op to extract from is a mul or udiv by a constant.
5508     // Check:
5509     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5510     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5511     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5512                                                  NeededShiftAmt.getZExtValue());
5513     APInt ResultAmt;
5514     APInt Rem;
5515     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5516     if (Rem != 0 || ResultAmt != OppLHSAmt)
5517       return SDValue();
5518   } else {
5519     // Op to extract from is a shift by a constant.
5520     // Check:
5521     //      c2 - (bitwidth(op0 v c0) - c1) == c0
5522     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5523                                           ExtractFromAmt.getBitWidth()))
5524       return SDValue();
5525   }
5526
5527   // Return the expanded shift op that should allow a rotate to be formed.
5528   EVT ShiftVT = OppShift.getOperand(1).getValueType();
5529   EVT ResVT = ExtractFrom.getValueType();
5530   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5531   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5532 }
5533
5534 // Return true if we can prove that, whenever Neg and Pos are both in the
5535 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
5536 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5537 //
5538 //     (or (shift1 X, Neg), (shift2 X, Pos))
5539 //
5540 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5541 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
5542 // to consider shift amounts with defined behavior.
5543 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5544                            SelectionDAG &DAG) {
5545   // If EltSize is a power of 2 then:
5546   //
5547   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5548   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5549   //
5550   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5551   // for the stronger condition:
5552   //
5553   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
5554   //
5555   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5556   // we can just replace Neg with Neg' for the rest of the function.
5557   //
5558   // In other cases we check for the even stronger condition:
5559   //
5560   //     Neg == EltSize - Pos                                    [B]
5561   //
5562   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
5563   // behavior if Pos == 0 (and consequently Neg == EltSize).
5564   //
5565   // We could actually use [A] whenever EltSize is a power of 2, but the
5566   // only extra cases that it would match are those uninteresting ones
5567   // where Neg and Pos are never in range at the same time.  E.g. for
5568   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5569   // as well as (sub 32, Pos), but:
5570   //
5571   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5572   //
5573   // always invokes undefined behavior for 32-bit X.
5574   //
5575   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5576   unsigned MaskLoBits = 0;
5577   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5578     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5579       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
5580       unsigned Bits = Log2_64(EltSize);
5581       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5582           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5583         Neg = Neg.getOperand(0);
5584         MaskLoBits = Bits;
5585       }
5586     }
5587   }
5588
5589   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5590   if (Neg.getOpcode() != ISD::SUB)
5591     return false;
5592   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
5593   if (!NegC)
5594     return false;
5595   SDValue NegOp1 = Neg.getOperand(1);
5596
5597   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5598   // Pos'.  The truncation is redundant for the purpose of the equality.
5599   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5600     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5601       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
5602       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5603           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5604            MaskLoBits))
5605         Pos = Pos.getOperand(0);
5606     }
5607   }
5608
5609   // The condition we need is now:
5610   //
5611   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5612   //
5613   // If NegOp1 == Pos then we need:
5614   //
5615   //              EltSize & Mask == NegC & Mask
5616   //
5617   // (because "x & Mask" is a truncation and distributes through subtraction).
5618   APInt Width;
5619   if (Pos == NegOp1)
5620     Width = NegC->getAPIntValue();
5621
5622   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5623   // Then the condition we want to prove becomes:
5624   //
5625   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5626   //
5627   // which, again because "x & Mask" is a truncation, becomes:
5628   //
5629   //                NegC & Mask == (EltSize - PosC) & Mask
5630   //             EltSize & Mask == (NegC + PosC) & Mask
5631   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5632     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5633       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5634     else
5635       return false;
5636   } else
5637     return false;
5638
5639   // Now we just need to check that EltSize & Mask == Width & Mask.
5640   if (MaskLoBits)
5641     // EltSize & Mask is 0 since Mask is EltSize - 1.
5642     return Width.getLoBits(MaskLoBits) == 0;
5643   return Width == EltSize;
5644 }
5645
5646 // A subroutine of MatchRotate used once we have found an OR of two opposite
5647 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
5648 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5649 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
5650 // Neg with outer conversions stripped away.
5651 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5652                                        SDValue Neg, SDValue InnerPos,
5653                                        SDValue InnerNeg, unsigned PosOpcode,
5654                                        unsigned NegOpcode, const SDLoc &DL) {
5655   // fold (or (shl x, (*ext y)),
5656   //          (srl x, (*ext (sub 32, y)))) ->
5657   //   (rotl x, y) or (rotr x, (sub 32, y))
5658   //
5659   // fold (or (shl x, (*ext (sub 32, y))),
5660   //          (srl x, (*ext y))) ->
5661   //   (rotr x, y) or (rotl x, (sub 32, y))
5662   EVT VT = Shifted.getValueType();
5663   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5664     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5665     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5666                        HasPos ? Pos : Neg).getNode();
5667   }
5668
5669   return nullptr;
5670 }
5671
5672 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
5673 // idioms for rotate, and if the target supports rotation instructions, generate
5674 // a rot[lr].
5675 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5676   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
5677   EVT VT = LHS.getValueType();
5678   if (!TLI.isTypeLegal(VT)) return nullptr;
5679
5680   // The target must have at least one rotate flavor.
5681   bool HasROTL = hasOperation(ISD::ROTL, VT);
5682   bool HasROTR = hasOperation(ISD::ROTR, VT);
5683   if (!HasROTL && !HasROTR) return nullptr;
5684
5685   // Check for truncated rotate.
5686   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5687       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5688     assert(LHS.getValueType() == RHS.getValueType());
5689     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5690       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5691                          SDValue(Rot, 0)).getNode();
5692     }
5693   }
5694
5695   // Match "(X shl/srl V1) & V2" where V2 may not be present.
5696   SDValue LHSShift;   // The shift.
5697   SDValue LHSMask;    // AND value if any.
5698   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
5699
5700   SDValue RHSShift;   // The shift.
5701   SDValue RHSMask;    // AND value if any.
5702   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
5703
5704   // If neither side matched a rotate half, bail
5705   if (!LHSShift && !RHSShift)
5706     return nullptr;
5707
5708   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5709   // side of the rotate, so try to handle that here. In all cases we need to
5710   // pass the matched shift from the opposite side to compute the opcode and
5711   // needed shift amount to extract.  We still want to do this if both sides
5712   // matched a rotate half because one half may be a potential overshift that
5713   // can be broken down (ie if InstCombine merged two shl or srl ops into a
5714   // single one).
5715
5716   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5717   if (LHSShift)
5718     if (SDValue NewRHSShift =
5719             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
5720       RHSShift = NewRHSShift;
5721   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5722   if (RHSShift)
5723     if (SDValue NewLHSShift =
5724             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
5725       LHSShift = NewLHSShift;
5726
5727   // If a side is still missing, nothing else we can do.
5728   if (!RHSShift || !LHSShift)
5729     return nullptr;
5730
5731   // At this point we've matched or extracted a shift op on each side.
5732
5733   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
5734     return nullptr;   // Not shifting the same value.
5735
5736   if (LHSShift.getOpcode() == RHSShift.getOpcode())
5737     return nullptr;   // Shifts must disagree.
5738
5739   // Canonicalize shl to left side in a shl/srl pair.
5740   if (RHSShift.getOpcode() == ISD::SHL) {
5741     std::swap(LHS, RHS);
5742     std::swap(LHSShift, RHSShift);
5743     std::swap(LHSMask, RHSMask);
5744   }
5745
5746   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5747   SDValue LHSShiftArg = LHSShift.getOperand(0);
5748   SDValue LHSShiftAmt = LHSShift.getOperand(1);
5749   SDValue RHSShiftArg = RHSShift.getOperand(0);
5750   SDValue RHSShiftAmt = RHSShift.getOperand(1);
5751
5752   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5753   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5754   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
5755                                         ConstantSDNode *RHS) {
5756     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
5757   };
5758   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
5759     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
5760                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
5761
5762     // If there is an AND of either shifted operand, apply it to the result.
5763     if (LHSMask.getNode() || RHSMask.getNode()) {
5764       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5765       SDValue Mask = AllOnes;
5766
5767       if (LHSMask.getNode()) {
5768         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
5769         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5770                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
5771       }
5772       if (RHSMask.getNode()) {
5773         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
5774         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5775                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
5776       }
5777
5778       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
5779     }
5780
5781     return Rot.getNode();
5782   }
5783
5784   // If there is a mask here, and we have a variable shift, we can't be sure
5785   // that we're masking out the right stuff.
5786   if (LHSMask.getNode() || RHSMask.getNode())
5787     return nullptr;
5788
5789   // If the shift amount is sign/zext/any-extended just peel it off.
5790   SDValue LExtOp0 = LHSShiftAmt;
5791   SDValue RExtOp0 = RHSShiftAmt;
5792   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5793        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5794        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5795        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
5796       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5797        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5798        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5799        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
5800     LExtOp0 = LHSShiftAmt.getOperand(0);
5801     RExtOp0 = RHSShiftAmt.getOperand(0);
5802   }
5803
5804   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
5805                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
5806   if (TryL)
5807     return TryL;
5808
5809   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
5810                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
5811   if (TryR)
5812     return TryR;
5813
5814   return nullptr;
5815 }
5816
5817 namespace {
5818
5819 /// Represents known origin of an individual byte in load combine pattern. The
5820 /// value of the byte is either constant zero or comes from memory.
5821 struct ByteProvider {
5822   // For constant zero providers Load is set to nullptr. For memory providers
5823   // Load represents the node which loads the byte from memory.
5824   // ByteOffset is the offset of the byte in the value produced by the load.
5825   LoadSDNode *Load = nullptr;
5826   unsigned ByteOffset = 0;
5827
5828   ByteProvider() = default;
5829
5830   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
5831     return ByteProvider(Load, ByteOffset);
5832   }
5833
5834   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
5835
5836   bool isConstantZero() const { return !Load; }
5837   bool isMemory() const { return Load; }
5838
5839   bool operator==(const ByteProvider &Other) const {
5840     return Other.Load == Load && Other.ByteOffset == ByteOffset;
5841   }
5842
5843 private:
5844   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5845       : Load(Load), ByteOffset(ByteOffset) {}
5846 };
5847
5848 } // end anonymous namespace
5849
5850 /// Recursively traverses the expression calculating the origin of the requested
5851 /// byte of the given value. Returns None if the provider can't be calculated.
5852 ///
5853 /// For all the values except the root of the expression verifies that the value
5854 /// has exactly one use and if it's not true return None. This way if the origin
5855 /// of the byte is returned it's guaranteed that the values which contribute to
5856 /// the byte are not used outside of this expression.
5857 ///
5858 /// Because the parts of the expression are not allowed to have more than one
5859 /// use this function iterates over trees, not DAGs. So it never visits the same
5860 /// node more than once.
5861 static const Optional<ByteProvider>
5862 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5863                       bool Root = false) {
5864   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5865   if (Depth == 10)
5866     return None;
5867
5868   if (!Root && !Op.hasOneUse())
5869     return None;
5870
5871   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
5872   unsigned BitWidth = Op.getValueSizeInBits();
5873   if (BitWidth % 8 != 0)
5874     return None;
5875   unsigned ByteWidth = BitWidth / 8;
5876   assert(Index < ByteWidth && "invalid index requested");
5877   (void) ByteWidth;
5878
5879   switch (Op.getOpcode()) {
5880   case ISD::OR: {
5881     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5882     if (!LHS)
5883       return None;
5884     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5885     if (!RHS)
5886       return None;
5887
5888     if (LHS->isConstantZero())
5889       return RHS;
5890     if (RHS->isConstantZero())
5891       return LHS;
5892     return None;
5893   }
5894   case ISD::SHL: {
5895     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5896     if (!ShiftOp)
5897       return None;
5898
5899     uint64_t BitShift = ShiftOp->getZExtValue();
5900     if (BitShift % 8 != 0)
5901       return None;
5902     uint64_t ByteShift = BitShift / 8;
5903
5904     return Index < ByteShift
5905                ? ByteProvider::getConstantZero()
5906                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5907                                        Depth + 1);
5908   }
5909   case ISD::ANY_EXTEND:
5910   case ISD::SIGN_EXTEND:
5911   case ISD::ZERO_EXTEND: {
5912     SDValue NarrowOp = Op->getOperand(0);
5913     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5914     if (NarrowBitWidth % 8 != 0)
5915       return None;
5916     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5917
5918     if (Index >= NarrowByteWidth)
5919       return Op.getOpcode() == ISD::ZERO_EXTEND
5920                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5921                  : None;
5922     return calculateByteProvider(NarrowOp, Index, Depth + 1);
5923   }
5924   case ISD::BSWAP:
5925     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5926                                  Depth + 1);
5927   case ISD::LOAD: {
5928     auto L = cast<LoadSDNode>(Op.getNode());
5929     if (L->isVolatile() || L->isIndexed())
5930       return None;
5931
5932     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5933     if (NarrowBitWidth % 8 != 0)
5934       return None;
5935     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5936
5937     if (Index >= NarrowByteWidth)
5938       return L->getExtensionType() == ISD::ZEXTLOAD
5939                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5940                  : None;
5941     return ByteProvider::getMemory(L, Index);
5942   }
5943   }
5944
5945   return None;
5946 }
5947
5948 /// Match a pattern where a wide type scalar value is loaded by several narrow
5949 /// loads and combined by shifts and ors. Fold it into a single load or a load
5950 /// and a BSWAP if the targets supports it.
5951 ///
5952 /// Assuming little endian target:
5953 ///  i8 *a = ...
5954 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5955 /// =>
5956 ///  i32 val = *((i32)a)
5957 ///
5958 ///  i8 *a = ...
5959 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5960 /// =>
5961 ///  i32 val = BSWAP(*((i32)a))
5962 ///
5963 /// TODO: This rule matches complex patterns with OR node roots and doesn't
5964 /// interact well with the worklist mechanism. When a part of the pattern is
5965 /// updated (e.g. one of the loads) its direct users are put into the worklist,
5966 /// but the root node of the pattern which triggers the load combine is not
5967 /// necessarily a direct user of the changed node. For example, once the address
5968 /// of t28 load is reassociated load combine won't be triggered:
5969 ///             t25: i32 = add t4, Constant:i32<2>
5970 ///           t26: i64 = sign_extend t25
5971 ///        t27: i64 = add t2, t26
5972 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5973 ///     t29: i32 = zero_extend t28
5974 ///   t32: i32 = shl t29, Constant:i8<8>
5975 /// t33: i32 = or t23, t32
5976 /// As a possible fix visitLoad can check if the load can be a part of a load
5977 /// combine pattern and add corresponding OR roots to the worklist.
5978 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
5979   assert(N->getOpcode() == ISD::OR &&
5980          "Can only match load combining against OR nodes");
5981
5982   // Handles simple types only
5983   EVT VT = N->getValueType(0);
5984   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
5985     return SDValue();
5986   unsigned ByteWidth = VT.getSizeInBits() / 8;
5987
5988   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5989   // Before legalize we can introduce too wide illegal loads which will be later
5990   // split into legal sized loads. This enables us to combine i64 load by i8
5991   // patterns to a couple of i32 loads on 32 bit targets.
5992   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
5993     return SDValue();
5994
5995   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
5996     unsigned BW, unsigned i) { return i; };
5997   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
5998     unsigned BW, unsigned i) { return BW - i - 1; };
5999
6000   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6001   auto MemoryByteOffset = [&] (ByteProvider P) {
6002     assert(P.isMemory() && "Must be a memory byte provider");
6003     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6004     assert(LoadBitWidth % 8 == 0 &&
6005            "can only analyze providers for individual bytes not bit");
6006     unsigned LoadByteWidth = LoadBitWidth / 8;
6007     return IsBigEndianTarget
6008             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
6009             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
6010   };
6011
6012   Optional<BaseIndexOffset> Base;
6013   SDValue Chain;
6014
6015   SmallPtrSet<LoadSDNode *, 8> Loads;
6016   Optional<ByteProvider> FirstByteProvider;
6017   int64_t FirstOffset = INT64_MAX;
6018
6019   // Check if all the bytes of the OR we are looking at are loaded from the same
6020   // base address. Collect bytes offsets from Base address in ByteOffsets.
6021   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
6022   for (unsigned i = 0; i < ByteWidth; i++) {
6023     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6024     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
6025       return SDValue();
6026
6027     LoadSDNode *L = P->Load;
6028     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
6029            "Must be enforced by calculateByteProvider");
6030     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
6031
6032     // All loads must share the same chain
6033     SDValue LChain = L->getChain();
6034     if (!Chain)
6035       Chain = LChain;
6036     else if (Chain != LChain)
6037       return SDValue();
6038
6039     // Loads must share the same base address
6040     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
6041     int64_t ByteOffsetFromBase = 0;
6042     if (!Base)
6043       Base = Ptr;
6044     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6045       return SDValue();
6046
6047     // Calculate the offset of the current byte from the base address
6048     ByteOffsetFromBase += MemoryByteOffset(*P);
6049     ByteOffsets[i] = ByteOffsetFromBase;
6050
6051     // Remember the first byte load
6052     if (ByteOffsetFromBase < FirstOffset) {
6053       FirstByteProvider = P;
6054       FirstOffset = ByteOffsetFromBase;
6055     }
6056
6057     Loads.insert(L);
6058   }
6059   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
6060          "memory, so there must be at least one load which produces the value");
6061   assert(Base && "Base address of the accessed memory location must be set");
6062   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6063
6064   // Check if the bytes of the OR we are looking at match with either big or
6065   // little endian value load
6066   bool BigEndian = true, LittleEndian = true;
6067   for (unsigned i = 0; i < ByteWidth; i++) {
6068     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6069     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
6070     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
6071     if (!BigEndian && !LittleEndian)
6072       return SDValue();
6073   }
6074   assert((BigEndian != LittleEndian) && "should be either or");
6075   assert(FirstByteProvider && "must be set");
6076
6077   // Ensure that the first byte is loaded from zero offset of the first load.
6078   // So the combined value can be loaded from the first load address.
6079   if (MemoryByteOffset(*FirstByteProvider) != 0)
6080     return SDValue();
6081   LoadSDNode *FirstLoad = FirstByteProvider->Load;
6082
6083   // The node we are looking at matches with the pattern, check if we can
6084   // replace it with a single load and bswap if needed.
6085
6086   // If the load needs byte swap check if the target supports it
6087   bool NeedsBswap = IsBigEndianTarget != BigEndian;
6088
6089   // Before legalize we can introduce illegal bswaps which will be later
6090   // converted to an explicit bswap sequence. This way we end up with a single
6091   // load and byte shuffling instead of several loads and byte shuffling.
6092   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6093     return SDValue();
6094
6095   // Check that a load of the wide type is both allowed and fast on the target
6096   bool Fast = false;
6097   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6098                                         VT, FirstLoad->getAddressSpace(),
6099                                         FirstLoad->getAlignment(), &Fast);
6100   if (!Allowed || !Fast)
6101     return SDValue();
6102
6103   SDValue NewLoad =
6104       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6105                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6106
6107   // Transfer chain users from old loads to the new load.
6108   for (LoadSDNode *L : Loads)
6109     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6110
6111   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
6112 }
6113
6114 // If the target has andn, bsl, or a similar bit-select instruction,
6115 // we want to unfold masked merge, with canonical pattern of:
6116 //   |        A  |  |B|
6117 //   ((x ^ y) & m) ^ y
6118 //    |  D  |
6119 // Into:
6120 //   (x & m) | (y & ~m)
6121 // If y is a constant, and the 'andn' does not work with immediates,
6122 // we unfold into a different pattern:
6123 //   ~(~x & m) & (m | y)
6124 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6125 //       the very least that breaks andnpd / andnps patterns, and because those
6126 //       patterns are simplified in IR and shouldn't be created in the DAG
6127 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6128   assert(N->getOpcode() == ISD::XOR);
6129
6130   // Don't touch 'not' (i.e. where y = -1).
6131   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6132     return SDValue();
6133
6134   EVT VT = N->getValueType(0);
6135
6136   // There are 3 commutable operators in the pattern,
6137   // so we have to deal with 8 possible variants of the basic pattern.
6138   SDValue X, Y, M;
6139   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6140     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
6141       return false;
6142     SDValue Xor = And.getOperand(XorIdx);
6143     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
6144       return false;
6145     SDValue Xor0 = Xor.getOperand(0);
6146     SDValue Xor1 = Xor.getOperand(1);
6147     // Don't touch 'not' (i.e. where y = -1).
6148     if (isAllOnesOrAllOnesSplat(Xor1))
6149       return false;
6150     if (Other == Xor0)
6151       std::swap(Xor0, Xor1);
6152     if (Other != Xor1)
6153       return false;
6154     X = Xor0;
6155     Y = Xor1;
6156     M = And.getOperand(XorIdx ? 0 : 1);
6157     return true;
6158   };
6159
6160   SDValue N0 = N->getOperand(0);
6161   SDValue N1 = N->getOperand(1);
6162   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
6163       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
6164     return SDValue();
6165
6166   // Don't do anything if the mask is constant. This should not be reachable.
6167   // InstCombine should have already unfolded this pattern, and DAGCombiner
6168   // probably shouldn't produce it, too.
6169   if (isa<ConstantSDNode>(M.getNode()))
6170     return SDValue();
6171
6172   // We can transform if the target has AndNot
6173   if (!TLI.hasAndNot(M))
6174     return SDValue();
6175
6176   SDLoc DL(N);
6177
6178   // If Y is a constant, check that 'andn' works with immediates.
6179   if (!TLI.hasAndNot(Y)) {
6180     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6181     // If not, we need to do a bit more work to make sure andn is still used.
6182     SDValue NotX = DAG.getNOT(DL, X, VT);
6183     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6184     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6185     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6186     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6187   }
6188
6189   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6190   SDValue NotM = DAG.getNOT(DL, M, VT);
6191   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6192
6193   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6194 }
6195
6196 SDValue DAGCombiner::visitXOR(SDNode *N) {
6197   SDValue N0 = N->getOperand(0);
6198   SDValue N1 = N->getOperand(1);
6199   EVT VT = N0.getValueType();
6200
6201   // fold vector ops
6202   if (VT.isVector()) {
6203     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6204       return FoldedVOp;
6205
6206     // fold (xor x, 0) -> x, vector edition
6207     if (ISD::isBuildVectorAllZeros(N0.getNode()))
6208       return N1;
6209     if (ISD::isBuildVectorAllZeros(N1.getNode()))
6210       return N0;
6211   }
6212
6213   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6214   SDLoc DL(N);
6215   if (N0.isUndef() && N1.isUndef())
6216     return DAG.getConstant(0, DL, VT);
6217   // fold (xor x, undef) -> undef
6218   if (N0.isUndef())
6219     return N0;
6220   if (N1.isUndef())
6221     return N1;
6222   // fold (xor c1, c2) -> c1^c2
6223   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6224   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6225   if (N0C && N1C)
6226     return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6227   // canonicalize constant to RHS
6228   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6229      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6230     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6231   // fold (xor x, 0) -> x
6232   if (isNullConstant(N1))
6233     return N0;
6234
6235   if (SDValue NewSel = foldBinOpIntoSelect(N))
6236     return NewSel;
6237
6238   // reassociate xor
6239   if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6240     return RXOR;
6241
6242   // fold !(x cc y) -> (x !cc y)
6243   unsigned N0Opcode = N0.getOpcode();
6244   SDValue LHS, RHS, CC;
6245   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6246     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6247                                                LHS.getValueType().isInteger());
6248     if (!LegalOperations ||
6249         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6250       switch (N0Opcode) {
6251       default:
6252         llvm_unreachable("Unhandled SetCC Equivalent!");
6253       case ISD::SETCC:
6254         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6255       case ISD::SELECT_CC:
6256         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6257                                N0.getOperand(3), NotCC);
6258       }
6259     }
6260   }
6261
6262   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6263   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6264       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6265     SDValue V = N0.getOperand(0);
6266     SDLoc DL0(N0);
6267     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6268                     DAG.getConstant(1, DL0, V.getValueType()));
6269     AddToWorklist(V.getNode());
6270     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6271   }
6272
6273   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6274   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6275       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6276     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6277     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6278       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6279       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6280       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6281       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6282       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6283     }
6284   }
6285   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6286   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6287       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6288     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6289     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6290       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6291       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6292       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6293       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6294       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6295     }
6296   }
6297   // fold (xor (and x, y), y) -> (and (not x), y)
6298   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
6299     SDValue X = N0.getOperand(0);
6300     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6301     AddToWorklist(NotX.getNode());
6302     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6303   }
6304
6305   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
6306     ConstantSDNode *XorC = isConstOrConstSplat(N1);
6307     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
6308     unsigned BitWidth = VT.getScalarSizeInBits();
6309     if (XorC && ShiftC) {
6310       // Don't crash on an oversized shift. We can not guarantee that a bogus
6311       // shift has been simplified to undef.
6312       uint64_t ShiftAmt = ShiftC->getLimitedValue();
6313       if (ShiftAmt < BitWidth) {
6314         APInt Ones = APInt::getAllOnesValue(BitWidth);
6315         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
6316         if (XorC->getAPIntValue() == Ones) {
6317           // If the xor constant is a shifted -1, do a 'not' before the shift:
6318           // xor (X << ShiftC), XorC --> (not X) << ShiftC
6319           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
6320           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
6321           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
6322         }
6323       }
6324     }
6325   }
6326
6327   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6328   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6329     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
6330     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
6331     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6332       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6333       SDValue S0 = S.getOperand(0);
6334       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6335         unsigned OpSizeInBits = VT.getScalarSizeInBits();
6336         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
6337           if (C->getAPIntValue() == (OpSizeInBits - 1))
6338             return DAG.getNode(ISD::ABS, DL, VT, S0);
6339       }
6340     }
6341   }
6342
6343   // fold (xor x, x) -> 0
6344   if (N0 == N1)
6345     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6346
6347   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6348   // Here is a concrete example of this equivalence:
6349   // i16   x ==  14
6350   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
6351   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6352   //
6353   // =>
6354   //
6355   // i16     ~1      == 0b1111111111111110
6356   // i16 rol(~1, 14) == 0b1011111111111111
6357   //
6358   // Some additional tips to help conceptualize this transform:
6359   // - Try to see the operation as placing a single zero in a value of all ones.
6360   // - There exists no value for x which would allow the result to contain zero.
6361   // - Values of x larger than the bitwidth are undefined and do not require a
6362   //   consistent result.
6363   // - Pushing the zero left requires shifting one bits in from the right.
6364   // A rotate left of ~1 is a nice way of achieving the desired result.
6365   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
6366       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
6367     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
6368                        N0.getOperand(1));
6369   }
6370
6371   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
6372   if (N0Opcode == N1.getOpcode())
6373     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6374       return V;
6375
6376   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
6377   if (SDValue MM = unfoldMaskedMerge(N))
6378     return MM;
6379
6380   // Simplify the expression using non-local knowledge.
6381   if (SimplifyDemandedBits(SDValue(N, 0)))
6382     return SDValue(N, 0);
6383
6384   return SDValue();
6385 }
6386
6387 /// Handle transforms common to the three shifts, when the shift amount is a
6388 /// constant.
6389 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
6390   // Do not turn a 'not' into a regular xor.
6391   if (isBitwiseNot(N->getOperand(0)))
6392     return SDValue();
6393
6394   SDNode *LHS = N->getOperand(0).getNode();
6395   if (!LHS->hasOneUse()) return SDValue();
6396
6397   // We want to pull some binops through shifts, so that we have (and (shift))
6398   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
6399   // thing happens with address calculations, so it's important to canonicalize
6400   // it.
6401   bool HighBitSet = false;  // Can we transform this if the high bit is set?
6402
6403   switch (LHS->getOpcode()) {
6404   default: return SDValue();
6405   case ISD::OR:
6406   case ISD::XOR:
6407     HighBitSet = false; // We can only transform sra if the high bit is clear.
6408     break;
6409   case ISD::AND:
6410     HighBitSet = true;  // We can only transform sra if the high bit is set.
6411     break;
6412   case ISD::ADD:
6413     if (N->getOpcode() != ISD::SHL)
6414       return SDValue(); // only shl(add) not sr[al](add).
6415     HighBitSet = false; // We can only transform sra if the high bit is clear.
6416     break;
6417   }
6418
6419   // We require the RHS of the binop to be a constant and not opaque as well.
6420   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
6421   if (!BinOpCst) return SDValue();
6422
6423   // FIXME: disable this unless the input to the binop is a shift by a constant
6424   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
6425   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
6426   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
6427                  BinOpLHSVal->getOpcode() == ISD::SRA ||
6428                  BinOpLHSVal->getOpcode() == ISD::SRL;
6429   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
6430                         BinOpLHSVal->getOpcode() == ISD::SELECT;
6431
6432   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
6433       !isCopyOrSelect)
6434     return SDValue();
6435
6436   if (isCopyOrSelect && N->hasOneUse())
6437     return SDValue();
6438
6439   EVT VT = N->getValueType(0);
6440
6441   // If this is a signed shift right, and the high bit is modified by the
6442   // logical operation, do not perform the transformation. The highBitSet
6443   // boolean indicates the value of the high bit of the constant which would
6444   // cause it to be modified for this operation.
6445   if (N->getOpcode() == ISD::SRA) {
6446     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
6447     if (BinOpRHSSignSet != HighBitSet)
6448       return SDValue();
6449   }
6450
6451   if (!TLI.isDesirableToCommuteWithShift(N, Level))
6452     return SDValue();
6453
6454   // Fold the constants, shifting the binop RHS by the shift amount.
6455   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
6456                                N->getValueType(0),
6457                                LHS->getOperand(1), N->getOperand(1));
6458   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
6459
6460   // Create the new shift.
6461   SDValue NewShift = DAG.getNode(N->getOpcode(),
6462                                  SDLoc(LHS->getOperand(0)),
6463                                  VT, LHS->getOperand(0), N->getOperand(1));
6464
6465   // Create the new binop.
6466   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
6467 }
6468
6469 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
6470   assert(N->getOpcode() == ISD::TRUNCATE);
6471   assert(N->getOperand(0).getOpcode() == ISD::AND);
6472
6473   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6474   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
6475     SDValue N01 = N->getOperand(0).getOperand(1);
6476     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
6477       SDLoc DL(N);
6478       EVT TruncVT = N->getValueType(0);
6479       SDValue N00 = N->getOperand(0).getOperand(0);
6480       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
6481       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
6482       AddToWorklist(Trunc00.getNode());
6483       AddToWorklist(Trunc01.getNode());
6484       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
6485     }
6486   }
6487
6488   return SDValue();
6489 }
6490
6491 SDValue DAGCombiner::visitRotate(SDNode *N) {
6492   SDLoc dl(N);
6493   SDValue N0 = N->getOperand(0);
6494   SDValue N1 = N->getOperand(1);
6495   EVT VT = N->getValueType(0);
6496   unsigned Bitsize = VT.getScalarSizeInBits();
6497
6498   // fold (rot x, 0) -> x
6499   if (isNullOrNullSplat(N1))
6500     return N0;
6501
6502   // fold (rot x, c) -> x iff (c % BitSize) == 0
6503   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
6504     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
6505     if (DAG.MaskedValueIsZero(N1, ModuloMask))
6506       return N0;
6507   }
6508
6509   // fold (rot x, c) -> (rot x, c % BitSize)
6510   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
6511     if (Cst->getAPIntValue().uge(Bitsize)) {
6512       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
6513       return DAG.getNode(N->getOpcode(), dl, VT, N0,
6514                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
6515     }
6516   }
6517
6518   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6519   if (N1.getOpcode() == ISD::TRUNCATE &&
6520       N1.getOperand(0).getOpcode() == ISD::AND) {
6521     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6522       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
6523   }
6524
6525   unsigned NextOp = N0.getOpcode();
6526   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6527   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
6528     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
6529     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
6530     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
6531       EVT ShiftVT = C1->getValueType(0);
6532       bool SameSide = (N->getOpcode() == NextOp);
6533       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
6534       if (SDValue CombinedShift =
6535               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
6536         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
6537         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
6538             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
6539             BitsizeC.getNode());
6540         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
6541                            CombinedShiftNorm);
6542       }
6543     }
6544   }
6545   return SDValue();
6546 }
6547
6548 SDValue DAGCombiner::visitSHL(SDNode *N) {
6549   SDValue N0 = N->getOperand(0);
6550   SDValue N1 = N->getOperand(1);
6551   if (SDValue V = DAG.simplifyShift(N0, N1))
6552     return V;
6553
6554   EVT VT = N0.getValueType();
6555   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6556
6557   // fold vector ops
6558   if (VT.isVector()) {
6559     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6560       return FoldedVOp;
6561
6562     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
6563     // If setcc produces all-one true value then:
6564     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6565     if (N1CV && N1CV->isConstant()) {
6566       if (N0.getOpcode() == ISD::AND) {
6567         SDValue N00 = N0->getOperand(0);
6568         SDValue N01 = N0->getOperand(1);
6569         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
6570
6571         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
6572             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
6573                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
6574           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
6575                                                      N01CV, N1CV))
6576             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
6577         }
6578       }
6579     }
6580   }
6581
6582   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6583
6584   // fold (shl c1, c2) -> c1<<c2
6585   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6586   if (N0C && N1C && !N1C->isOpaque())
6587     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
6588
6589   if (SDValue NewSel = foldBinOpIntoSelect(N))
6590     return NewSel;
6591
6592   // if (shl x, c) is known to be zero, return 0
6593   if (DAG.MaskedValueIsZero(SDValue(N, 0),
6594                             APInt::getAllOnesValue(OpSizeInBits)))
6595     return DAG.getConstant(0, SDLoc(N), VT);
6596   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6597   if (N1.getOpcode() == ISD::TRUNCATE &&
6598       N1.getOperand(0).getOpcode() == ISD::AND) {
6599     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6600       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6601   }
6602
6603   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6604     return SDValue(N, 0);
6605
6606   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6607   if (N0.getOpcode() == ISD::SHL) {
6608     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6609                                           ConstantSDNode *RHS) {
6610       APInt c1 = LHS->getAPIntValue();
6611       APInt c2 = RHS->getAPIntValue();
6612       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6613       return (c1 + c2).uge(OpSizeInBits);
6614     };
6615     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6616       return DAG.getConstant(0, SDLoc(N), VT);
6617
6618     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6619                                        ConstantSDNode *RHS) {
6620       APInt c1 = LHS->getAPIntValue();
6621       APInt c2 = RHS->getAPIntValue();
6622       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6623       return (c1 + c2).ult(OpSizeInBits);
6624     };
6625     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6626       SDLoc DL(N);
6627       EVT ShiftVT = N1.getValueType();
6628       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6629       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6630     }
6631   }
6632
6633   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6634   // For this to be valid, the second form must not preserve any of the bits
6635   // that are shifted out by the inner shift in the first form.  This means
6636   // the outer shift size must be >= the number of bits added by the ext.
6637   // As a corollary, we don't care what kind of ext it is.
6638   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6639               N0.getOpcode() == ISD::ANY_EXTEND ||
6640               N0.getOpcode() == ISD::SIGN_EXTEND) &&
6641       N0.getOperand(0).getOpcode() == ISD::SHL) {
6642     SDValue N0Op0 = N0.getOperand(0);
6643     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6644       APInt c1 = N0Op0C1->getAPIntValue();
6645       APInt c2 = N1C->getAPIntValue();
6646       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6647
6648       EVT InnerShiftVT = N0Op0.getValueType();
6649       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6650       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6651         SDLoc DL(N0);
6652         APInt Sum = c1 + c2;
6653         if (Sum.uge(OpSizeInBits))
6654           return DAG.getConstant(0, DL, VT);
6655
6656         return DAG.getNode(
6657             ISD::SHL, DL, VT,
6658             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6659             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6660       }
6661     }
6662   }
6663
6664   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6665   // Only fold this if the inner zext has no other uses to avoid increasing
6666   // the total number of instructions.
6667   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6668       N0.getOperand(0).getOpcode() == ISD::SRL) {
6669     SDValue N0Op0 = N0.getOperand(0);
6670     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6671       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6672         uint64_t c1 = N0Op0C1->getZExtValue();
6673         uint64_t c2 = N1C->getZExtValue();
6674         if (c1 == c2) {
6675           SDValue NewOp0 = N0.getOperand(0);
6676           EVT CountVT = NewOp0.getOperand(1).getValueType();
6677           SDLoc DL(N);
6678           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6679                                        NewOp0,
6680                                        DAG.getConstant(c2, DL, CountVT));
6681           AddToWorklist(NewSHL.getNode());
6682           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6683         }
6684       }
6685     }
6686   }
6687
6688   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
6689   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
6690   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6691       N0->getFlags().hasExact()) {
6692     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6693       uint64_t C1 = N0C1->getZExtValue();
6694       uint64_t C2 = N1C->getZExtValue();
6695       SDLoc DL(N);
6696       if (C1 <= C2)
6697         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6698                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6699       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6700                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6701     }
6702   }
6703
6704   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6705   //                               (and (srl x, (sub c1, c2), MASK)
6706   // Only fold this if the inner shift has no other uses -- if it does, folding
6707   // this will increase the total number of instructions.
6708   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
6709       TLI.shouldFoldShiftPairToMask(N, Level)) {
6710     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6711       uint64_t c1 = N0C1->getZExtValue();
6712       if (c1 < OpSizeInBits) {
6713         uint64_t c2 = N1C->getZExtValue();
6714         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6715         SDValue Shift;
6716         if (c2 > c1) {
6717           Mask <<= c2 - c1;
6718           SDLoc DL(N);
6719           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6720                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
6721         } else {
6722           Mask.lshrInPlace(c1 - c2);
6723           SDLoc DL(N);
6724           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
6725                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
6726         }
6727         SDLoc DL(N0);
6728         return DAG.getNode(ISD::AND, DL, VT, Shift,
6729                            DAG.getConstant(Mask, DL, VT));
6730       }
6731     }
6732   }
6733
6734   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6735   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
6736       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
6737     SDLoc DL(N);
6738     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
6739     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
6740     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
6741   }
6742
6743   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6744   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6745   // Variant of version done on multiply, except mul by a power of 2 is turned
6746   // into a shift.
6747   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
6748       N0.getNode()->hasOneUse() &&
6749       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6750       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
6751       TLI.isDesirableToCommuteWithShift(N, Level)) {
6752     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
6753     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6754     AddToWorklist(Shl0.getNode());
6755     AddToWorklist(Shl1.getNode());
6756     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
6757   }
6758
6759   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6760   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
6761       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6762       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6763     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6764     if (isConstantOrConstantVector(Shl))
6765       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
6766   }
6767
6768   if (N1C && !N1C->isOpaque())
6769     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
6770       return NewSHL;
6771
6772   return SDValue();
6773 }
6774
6775 SDValue DAGCombiner::visitSRA(SDNode *N) {
6776   SDValue N0 = N->getOperand(0);
6777   SDValue N1 = N->getOperand(1);
6778   if (SDValue V = DAG.simplifyShift(N0, N1))
6779     return V;
6780
6781   EVT VT = N0.getValueType();
6782   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6783
6784   // Arithmetic shifting an all-sign-bit value is a no-op.
6785   // fold (sra 0, x) -> 0
6786   // fold (sra -1, x) -> -1
6787   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
6788     return N0;
6789
6790   // fold vector ops
6791   if (VT.isVector())
6792     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6793       return FoldedVOp;
6794
6795   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6796
6797   // fold (sra c1, c2) -> (sra c1, c2)
6798   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6799   if (N0C && N1C && !N1C->isOpaque())
6800     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
6801
6802   if (SDValue NewSel = foldBinOpIntoSelect(N))
6803     return NewSel;
6804
6805   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
6806   // sext_inreg.
6807   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
6808     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
6809     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
6810     if (VT.isVector())
6811       ExtVT = EVT::getVectorVT(*DAG.getContext(),
6812                                ExtVT, VT.getVectorNumElements());
6813     if ((!LegalOperations ||
6814          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
6815       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6816                          N0.getOperand(0), DAG.getValueType(ExtVT));
6817   }
6818
6819   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
6820   // clamp (add c1, c2) to max shift.
6821   if (N0.getOpcode() == ISD::SRA) {
6822     SDLoc DL(N);
6823     EVT ShiftVT = N1.getValueType();
6824     EVT ShiftSVT = ShiftVT.getScalarType();
6825     SmallVector<SDValue, 16> ShiftValues;
6826
6827     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6828       APInt c1 = LHS->getAPIntValue();
6829       APInt c2 = RHS->getAPIntValue();
6830       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6831       APInt Sum = c1 + c2;
6832       unsigned ShiftSum =
6833           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
6834       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
6835       return true;
6836     };
6837     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
6838       SDValue ShiftValue;
6839       if (VT.isVector())
6840         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
6841       else
6842         ShiftValue = ShiftValues[0];
6843       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
6844     }
6845   }
6846
6847   // fold (sra (shl X, m), (sub result_size, n))
6848   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
6849   // result_size - n != m.
6850   // If truncate is free for the target sext(shl) is likely to result in better
6851   // code.
6852   if (N0.getOpcode() == ISD::SHL && N1C) {
6853     // Get the two constanst of the shifts, CN0 = m, CN = n.
6854     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
6855     if (N01C) {
6856       LLVMContext &Ctx = *DAG.getContext();
6857       // Determine what the truncate's result bitsize and type would be.
6858       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
6859
6860       if (VT.isVector())
6861         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
6862
6863       // Determine the residual right-shift amount.
6864       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
6865
6866       // If the shift is not a no-op (in which case this should be just a sign
6867       // extend already), the truncated to type is legal, sign_extend is legal
6868       // on that type, and the truncate to that type is both legal and free,
6869       // perform the transform.
6870       if ((ShiftAmt > 0) &&
6871           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
6872           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
6873           TLI.isTruncateFree(VT, TruncVT)) {
6874         SDLoc DL(N);
6875         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
6876             getShiftAmountTy(N0.getOperand(0).getValueType()));
6877         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
6878                                     N0.getOperand(0), Amt);
6879         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
6880                                     Shift);
6881         return DAG.getNode(ISD::SIGN_EXTEND, DL,
6882                            N->getValueType(0), Trunc);
6883       }
6884     }
6885   }
6886
6887   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
6888   if (N1.getOpcode() == ISD::TRUNCATE &&
6889       N1.getOperand(0).getOpcode() == ISD::AND) {
6890     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6891       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
6892   }
6893
6894   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
6895   //      if c1 is equal to the number of bits the trunc removes
6896   if (N0.getOpcode() == ISD::TRUNCATE &&
6897       (N0.getOperand(0).getOpcode() == ISD::SRL ||
6898        N0.getOperand(0).getOpcode() == ISD::SRA) &&
6899       N0.getOperand(0).hasOneUse() &&
6900       N0.getOperand(0).getOperand(1).hasOneUse() &&
6901       N1C) {
6902     SDValue N0Op0 = N0.getOperand(0);
6903     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
6904       unsigned LargeShiftVal = LargeShift->getZExtValue();
6905       EVT LargeVT = N0Op0.getValueType();
6906
6907       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
6908         SDLoc DL(N);
6909         SDValue Amt =
6910           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
6911                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
6912         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
6913                                   N0Op0.getOperand(0), Amt);
6914         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
6915       }
6916     }
6917   }
6918
6919   // Simplify, based on bits shifted out of the LHS.
6920   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6921     return SDValue(N, 0);
6922
6923   // If the sign bit is known to be zero, switch this to a SRL.
6924   if (DAG.SignBitIsZero(N0))
6925     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
6926
6927   if (N1C && !N1C->isOpaque())
6928     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
6929       return NewSRA;
6930
6931   return SDValue();
6932 }
6933
6934 SDValue DAGCombiner::visitSRL(SDNode *N) {
6935   SDValue N0 = N->getOperand(0);
6936   SDValue N1 = N->getOperand(1);
6937   if (SDValue V = DAG.simplifyShift(N0, N1))
6938     return V;
6939
6940   EVT VT = N0.getValueType();
6941   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6942
6943   // fold vector ops
6944   if (VT.isVector())
6945     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6946       return FoldedVOp;
6947
6948   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6949
6950   // fold (srl c1, c2) -> c1 >>u c2
6951   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6952   if (N0C && N1C && !N1C->isOpaque())
6953     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6954
6955   if (SDValue NewSel = foldBinOpIntoSelect(N))
6956     return NewSel;
6957
6958   // if (srl x, c) is known to be zero, return 0
6959   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6960                                    APInt::getAllOnesValue(OpSizeInBits)))
6961     return DAG.getConstant(0, SDLoc(N), VT);
6962
6963   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6964   if (N0.getOpcode() == ISD::SRL) {
6965     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6966                                           ConstantSDNode *RHS) {
6967       APInt c1 = LHS->getAPIntValue();
6968       APInt c2 = RHS->getAPIntValue();
6969       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6970       return (c1 + c2).uge(OpSizeInBits);
6971     };
6972     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6973       return DAG.getConstant(0, SDLoc(N), VT);
6974
6975     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6976                                        ConstantSDNode *RHS) {
6977       APInt c1 = LHS->getAPIntValue();
6978       APInt c2 = RHS->getAPIntValue();
6979       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6980       return (c1 + c2).ult(OpSizeInBits);
6981     };
6982     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6983       SDLoc DL(N);
6984       EVT ShiftVT = N1.getValueType();
6985       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6986       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
6987     }
6988   }
6989
6990   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
6991   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
6992       N0.getOperand(0).getOpcode() == ISD::SRL) {
6993     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
6994       uint64_t c1 = N001C->getZExtValue();
6995       uint64_t c2 = N1C->getZExtValue();
6996       EVT InnerShiftVT = N0.getOperand(0).getValueType();
6997       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
6998       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6999       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7000       if (c1 + OpSizeInBits == InnerShiftSize) {
7001         SDLoc DL(N0);
7002         if (c1 + c2 >= InnerShiftSize)
7003           return DAG.getConstant(0, DL, VT);
7004         return DAG.getNode(ISD::TRUNCATE, DL, VT,
7005                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
7006                                        N0.getOperand(0).getOperand(0),
7007                                        DAG.getConstant(c1 + c2, DL,
7008                                                        ShiftCountVT)));
7009       }
7010     }
7011   }
7012
7013   // fold (srl (shl x, c), c) -> (and x, cst2)
7014   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
7015       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
7016     SDLoc DL(N);
7017     SDValue Mask =
7018         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
7019     AddToWorklist(Mask.getNode());
7020     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
7021   }
7022
7023   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
7024   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7025     // Shifting in all undef bits?
7026     EVT SmallVT = N0.getOperand(0).getValueType();
7027     unsigned BitSize = SmallVT.getScalarSizeInBits();
7028     if (N1C->getZExtValue() >= BitSize)
7029       return DAG.getUNDEF(VT);
7030
7031     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
7032       uint64_t ShiftAmt = N1C->getZExtValue();
7033       SDLoc DL0(N0);
7034       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
7035                                        N0.getOperand(0),
7036                           DAG.getConstant(ShiftAmt, DL0,
7037                                           getShiftAmountTy(SmallVT)));
7038       AddToWorklist(SmallShift.getNode());
7039       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
7040       SDLoc DL(N);
7041       return DAG.getNode(ISD::AND, DL, VT,
7042                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
7043                          DAG.getConstant(Mask, DL, VT));
7044     }
7045   }
7046
7047   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
7048   // bit, which is unmodified by sra.
7049   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
7050     if (N0.getOpcode() == ISD::SRA)
7051       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
7052   }
7053
7054   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
7055   if (N1C && N0.getOpcode() == ISD::CTLZ &&
7056       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
7057     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
7058
7059     // If any of the input bits are KnownOne, then the input couldn't be all
7060     // zeros, thus the result of the srl will always be zero.
7061     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
7062
7063     // If all of the bits input the to ctlz node are known to be zero, then
7064     // the result of the ctlz is "32" and the result of the shift is one.
7065     APInt UnknownBits = ~Known.Zero;
7066     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
7067
7068     // Otherwise, check to see if there is exactly one bit input to the ctlz.
7069     if (UnknownBits.isPowerOf2()) {
7070       // Okay, we know that only that the single bit specified by UnknownBits
7071       // could be set on input to the CTLZ node. If this bit is set, the SRL
7072       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
7073       // to an SRL/XOR pair, which is likely to simplify more.
7074       unsigned ShAmt = UnknownBits.countTrailingZeros();
7075       SDValue Op = N0.getOperand(0);
7076
7077       if (ShAmt) {
7078         SDLoc DL(N0);
7079         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
7080                   DAG.getConstant(ShAmt, DL,
7081                                   getShiftAmountTy(Op.getValueType())));
7082         AddToWorklist(Op.getNode());
7083       }
7084
7085       SDLoc DL(N);
7086       return DAG.getNode(ISD::XOR, DL, VT,
7087                          Op, DAG.getConstant(1, DL, VT));
7088     }
7089   }
7090
7091   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7092   if (N1.getOpcode() == ISD::TRUNCATE &&
7093       N1.getOperand(0).getOpcode() == ISD::AND) {
7094     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7095       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
7096   }
7097
7098   // fold operands of srl based on knowledge that the low bits are not
7099   // demanded.
7100   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7101     return SDValue(N, 0);
7102
7103   if (N1C && !N1C->isOpaque())
7104     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
7105       return NewSRL;
7106
7107   // Attempt to convert a srl of a load into a narrower zero-extending load.
7108   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7109     return NarrowLoad;
7110
7111   // Here is a common situation. We want to optimize:
7112   //
7113   //   %a = ...
7114   //   %b = and i32 %a, 2
7115   //   %c = srl i32 %b, 1
7116   //   brcond i32 %c ...
7117   //
7118   // into
7119   //
7120   //   %a = ...
7121   //   %b = and %a, 2
7122   //   %c = setcc eq %b, 0
7123   //   brcond %c ...
7124   //
7125   // However when after the source operand of SRL is optimized into AND, the SRL
7126   // itself may not be optimized further. Look for it and add the BRCOND into
7127   // the worklist.
7128   if (N->hasOneUse()) {
7129     SDNode *Use = *N->use_begin();
7130     if (Use->getOpcode() == ISD::BRCOND)
7131       AddToWorklist(Use);
7132     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
7133       // Also look pass the truncate.
7134       Use = *Use->use_begin();
7135       if (Use->getOpcode() == ISD::BRCOND)
7136         AddToWorklist(Use);
7137     }
7138   }
7139
7140   return SDValue();
7141 }
7142
7143 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
7144   EVT VT = N->getValueType(0);
7145   SDValue N0 = N->getOperand(0);
7146   SDValue N1 = N->getOperand(1);
7147   SDValue N2 = N->getOperand(2);
7148   bool IsFSHL = N->getOpcode() == ISD::FSHL;
7149   unsigned BitWidth = VT.getScalarSizeInBits();
7150
7151   // fold (fshl N0, N1, 0) -> N0
7152   // fold (fshr N0, N1, 0) -> N1
7153   if (isPowerOf2_32(BitWidth))
7154     if (DAG.MaskedValueIsZero(
7155             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
7156       return IsFSHL ? N0 : N1;
7157
7158   auto IsUndefOrZero = [](SDValue V) {
7159     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
7160   };
7161
7162   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7163     EVT ShAmtTy = N2.getValueType();
7164
7165     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7166     if (Cst->getAPIntValue().uge(BitWidth)) {
7167       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
7168       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7169                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
7170     }
7171
7172     unsigned ShAmt = Cst->getZExtValue();
7173     if (ShAmt == 0)
7174       return IsFSHL ? N0 : N1;
7175
7176     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
7177     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
7178     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
7179     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
7180     if (IsUndefOrZero(N0))
7181       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
7182                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
7183                                          SDLoc(N), ShAmtTy));
7184     if (IsUndefOrZero(N1))
7185       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
7186                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
7187                                          SDLoc(N), ShAmtTy));
7188   }
7189
7190   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
7191   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
7192   // iff We know the shift amount is in range.
7193   // TODO: when is it worth doing SUB(BW, N2) as well?
7194   if (isPowerOf2_32(BitWidth)) {
7195     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
7196     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7197       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
7198     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7199       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
7200   }
7201
7202   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7203   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7204   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7205   // is legal as well we might be better off avoiding non-constant (BW - N2).
7206   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
7207   if (N0 == N1 && hasOperation(RotOpc, VT))
7208     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
7209
7210   // Simplify, based on bits shifted out of N0/N1.
7211   if (SimplifyDemandedBits(SDValue(N, 0)))
7212     return SDValue(N, 0);
7213
7214   return SDValue();
7215 }
7216
7217 SDValue DAGCombiner::visitABS(SDNode *N) {
7218   SDValue N0 = N->getOperand(0);
7219   EVT VT = N->getValueType(0);
7220
7221   // fold (abs c1) -> c2
7222   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7223     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
7224   // fold (abs (abs x)) -> (abs x)
7225   if (N0.getOpcode() == ISD::ABS)
7226     return N0;
7227   // fold (abs x) -> x iff not-negative
7228   if (DAG.SignBitIsZero(N0))
7229     return N0;
7230   return SDValue();
7231 }
7232
7233 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
7234   SDValue N0 = N->getOperand(0);
7235   EVT VT = N->getValueType(0);
7236
7237   // fold (bswap c1) -> c2
7238   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7239     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
7240   // fold (bswap (bswap x)) -> x
7241   if (N0.getOpcode() == ISD::BSWAP)
7242     return N0->getOperand(0);
7243   return SDValue();
7244 }
7245
7246 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
7247   SDValue N0 = N->getOperand(0);
7248   EVT VT = N->getValueType(0);
7249
7250   // fold (bitreverse c1) -> c2
7251   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7252     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
7253   // fold (bitreverse (bitreverse x)) -> x
7254   if (N0.getOpcode() == ISD::BITREVERSE)
7255     return N0.getOperand(0);
7256   return SDValue();
7257 }
7258
7259 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
7260   SDValue N0 = N->getOperand(0);
7261   EVT VT = N->getValueType(0);
7262
7263   // fold (ctlz c1) -> c2
7264   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7265     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
7266
7267   // If the value is known never to be zero, switch to the undef version.
7268   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
7269     if (DAG.isKnownNeverZero(N0))
7270       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7271   }
7272
7273   return SDValue();
7274 }
7275
7276 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
7277   SDValue N0 = N->getOperand(0);
7278   EVT VT = N->getValueType(0);
7279
7280   // fold (ctlz_zero_undef c1) -> c2
7281   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7282     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7283   return SDValue();
7284 }
7285
7286 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
7287   SDValue N0 = N->getOperand(0);
7288   EVT VT = N->getValueType(0);
7289
7290   // fold (cttz c1) -> c2
7291   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7292     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
7293
7294   // If the value is known never to be zero, switch to the undef version.
7295   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
7296     if (DAG.isKnownNeverZero(N0))
7297       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7298   }
7299
7300   return SDValue();
7301 }
7302
7303 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
7304   SDValue N0 = N->getOperand(0);
7305   EVT VT = N->getValueType(0);
7306
7307   // fold (cttz_zero_undef c1) -> c2
7308   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7309     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7310   return SDValue();
7311 }
7312
7313 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
7314   SDValue N0 = N->getOperand(0);
7315   EVT VT = N->getValueType(0);
7316
7317   // fold (ctpop c1) -> c2
7318   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7319     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
7320   return SDValue();
7321 }
7322
7323 // FIXME: This should be checking for no signed zeros on individual operands, as
7324 // well as no nans.
7325 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
7326                                          SDValue RHS,
7327                                          const TargetLowering &TLI) {
7328   const TargetOptions &Options = DAG.getTarget().Options;
7329   EVT VT = LHS.getValueType();
7330
7331   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
7332          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
7333          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
7334 }
7335
7336 /// Generate Min/Max node
7337 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
7338                                    SDValue RHS, SDValue True, SDValue False,
7339                                    ISD::CondCode CC, const TargetLowering &TLI,
7340                                    SelectionDAG &DAG) {
7341   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
7342     return SDValue();
7343
7344   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7345   switch (CC) {
7346   case ISD::SETOLT:
7347   case ISD::SETOLE:
7348   case ISD::SETLT:
7349   case ISD::SETLE:
7350   case ISD::SETULT:
7351   case ISD::SETULE: {
7352     // Since it's known never nan to get here already, either fminnum or
7353     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
7354     // expanded in terms of it.
7355     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
7356     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7357       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7358
7359     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
7360     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7361       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7362     return SDValue();
7363   }
7364   case ISD::SETOGT:
7365   case ISD::SETOGE:
7366   case ISD::SETGT:
7367   case ISD::SETGE:
7368   case ISD::SETUGT:
7369   case ISD::SETUGE: {
7370     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
7371     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7372       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7373
7374     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
7375     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7376       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7377     return SDValue();
7378   }
7379   default:
7380     return SDValue();
7381   }
7382 }
7383
7384 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
7385   SDValue Cond = N->getOperand(0);
7386   SDValue N1 = N->getOperand(1);
7387   SDValue N2 = N->getOperand(2);
7388   EVT VT = N->getValueType(0);
7389   EVT CondVT = Cond.getValueType();
7390   SDLoc DL(N);
7391
7392   if (!VT.isInteger())
7393     return SDValue();
7394
7395   auto *C1 = dyn_cast<ConstantSDNode>(N1);
7396   auto *C2 = dyn_cast<ConstantSDNode>(N2);
7397   if (!C1 || !C2)
7398     return SDValue();
7399
7400   // Only do this before legalization to avoid conflicting with target-specific
7401   // transforms in the other direction (create a select from a zext/sext). There
7402   // is also a target-independent combine here in DAGCombiner in the other
7403   // direction for (select Cond, -1, 0) when the condition is not i1.
7404   if (CondVT == MVT::i1 && !LegalOperations) {
7405     if (C1->isNullValue() && C2->isOne()) {
7406       // select Cond, 0, 1 --> zext (!Cond)
7407       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7408       if (VT != MVT::i1)
7409         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
7410       return NotCond;
7411     }
7412     if (C1->isNullValue() && C2->isAllOnesValue()) {
7413       // select Cond, 0, -1 --> sext (!Cond)
7414       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7415       if (VT != MVT::i1)
7416         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
7417       return NotCond;
7418     }
7419     if (C1->isOne() && C2->isNullValue()) {
7420       // select Cond, 1, 0 --> zext (Cond)
7421       if (VT != MVT::i1)
7422         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7423       return Cond;
7424     }
7425     if (C1->isAllOnesValue() && C2->isNullValue()) {
7426       // select Cond, -1, 0 --> sext (Cond)
7427       if (VT != MVT::i1)
7428         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7429       return Cond;
7430     }
7431
7432     // For any constants that differ by 1, we can transform the select into an
7433     // extend and add. Use a target hook because some targets may prefer to
7434     // transform in the other direction.
7435     if (TLI.convertSelectOfConstantsToMath(VT)) {
7436       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
7437         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7438         if (VT != MVT::i1)
7439           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7440         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7441       }
7442       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
7443         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7444         if (VT != MVT::i1)
7445           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7446         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7447       }
7448     }
7449
7450     return SDValue();
7451   }
7452
7453   // fold (select Cond, 0, 1) -> (xor Cond, 1)
7454   // We can't do this reliably if integer based booleans have different contents
7455   // to floating point based booleans. This is because we can't tell whether we
7456   // have an integer-based boolean or a floating-point-based boolean unless we
7457   // can find the SETCC that produced it and inspect its operands. This is
7458   // fairly easy if C is the SETCC node, but it can potentially be
7459   // undiscoverable (or not reasonably discoverable). For example, it could be
7460   // in another basic block or it could require searching a complicated
7461   // expression.
7462   if (CondVT.isInteger() &&
7463       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7464           TargetLowering::ZeroOrOneBooleanContent &&
7465       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7466           TargetLowering::ZeroOrOneBooleanContent &&
7467       C1->isNullValue() && C2->isOne()) {
7468     SDValue NotCond =
7469         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
7470     if (VT.bitsEq(CondVT))
7471       return NotCond;
7472     return DAG.getZExtOrTrunc(NotCond, DL, VT);
7473   }
7474
7475   return SDValue();
7476 }
7477
7478 SDValue DAGCombiner::visitSELECT(SDNode *N) {
7479   SDValue N0 = N->getOperand(0);
7480   SDValue N1 = N->getOperand(1);
7481   SDValue N2 = N->getOperand(2);
7482   EVT VT = N->getValueType(0);
7483   EVT VT0 = N0.getValueType();
7484   SDLoc DL(N);
7485
7486   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
7487     return V;
7488
7489   // fold (select X, X, Y) -> (or X, Y)
7490   // fold (select X, 1, Y) -> (or C, Y)
7491   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
7492     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
7493
7494   if (SDValue V = foldSelectOfConstants(N))
7495     return V;
7496
7497   // fold (select C, 0, X) -> (and (not C), X)
7498   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
7499     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7500     AddToWorklist(NOTNode.getNode());
7501     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
7502   }
7503   // fold (select C, X, 1) -> (or (not C), X)
7504   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
7505     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7506     AddToWorklist(NOTNode.getNode());
7507     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
7508   }
7509   // fold (select X, Y, X) -> (and X, Y)
7510   // fold (select X, Y, 0) -> (and X, Y)
7511   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
7512     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
7513
7514   // If we can fold this based on the true/false value, do so.
7515   if (SimplifySelectOps(N, N1, N2))
7516     return SDValue(N, 0); // Don't revisit N.
7517
7518   if (VT0 == MVT::i1) {
7519     // The code in this block deals with the following 2 equivalences:
7520     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7521     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7522     // The target can specify its preferred form with the
7523     // shouldNormalizeToSelectSequence() callback. However we always transform
7524     // to the right anyway if we find the inner select exists in the DAG anyway
7525     // and we always transform to the left side if we know that we can further
7526     // optimize the combination of the conditions.
7527     bool normalizeToSequence =
7528         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
7529     // select (and Cond0, Cond1), X, Y
7530     //   -> select Cond0, (select Cond1, X, Y), Y
7531     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
7532       SDValue Cond0 = N0->getOperand(0);
7533       SDValue Cond1 = N0->getOperand(1);
7534       SDValue InnerSelect =
7535           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7536       if (normalizeToSequence || !InnerSelect.use_empty())
7537         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
7538                            InnerSelect, N2);
7539       // Cleanup on failure.
7540       if (InnerSelect.use_empty())
7541         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
7542     }
7543     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7544     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
7545       SDValue Cond0 = N0->getOperand(0);
7546       SDValue Cond1 = N0->getOperand(1);
7547       SDValue InnerSelect =
7548           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7549       if (normalizeToSequence || !InnerSelect.use_empty())
7550         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
7551                            InnerSelect);
7552       // Cleanup on failure.
7553       if (InnerSelect.use_empty())
7554         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
7555     }
7556
7557     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7558     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
7559       SDValue N1_0 = N1->getOperand(0);
7560       SDValue N1_1 = N1->getOperand(1);
7561       SDValue N1_2 = N1->getOperand(2);
7562       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
7563         // Create the actual and node if we can generate good code for it.
7564         if (!normalizeToSequence) {
7565           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
7566           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
7567         }
7568         // Otherwise see if we can optimize the "and" to a better pattern.
7569         if (SDValue Combined = visitANDLike(N0, N1_0, N))
7570           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
7571                              N2);
7572       }
7573     }
7574     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7575     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
7576       SDValue N2_0 = N2->getOperand(0);
7577       SDValue N2_1 = N2->getOperand(1);
7578       SDValue N2_2 = N2->getOperand(2);
7579       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
7580         // Create the actual or node if we can generate good code for it.
7581         if (!normalizeToSequence) {
7582           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
7583           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
7584         }
7585         // Otherwise see if we can optimize to a better pattern.
7586         if (SDValue Combined = visitORLike(N0, N2_0, N))
7587           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
7588                              N2_2);
7589       }
7590     }
7591   }
7592
7593   if (VT0 == MVT::i1) {
7594     // select (not Cond), N1, N2 -> select Cond, N2, N1
7595     if (isBitwiseNot(N0))
7596       return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
7597   }
7598
7599   // Fold selects based on a setcc into other things, such as min/max/abs.
7600   if (N0.getOpcode() == ISD::SETCC) {
7601     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
7602     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7603
7604     // select (fcmp lt x, y), x, y -> fminnum x, y
7605     // select (fcmp gt x, y), x, y -> fmaxnum x, y
7606     //
7607     // This is OK if we don't care what happens if either operand is a NaN.
7608     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
7609       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
7610                                                 CC, TLI, DAG))
7611         return FMinMax;
7612
7613     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
7614     // This is conservatively limited to pre-legal-operations to give targets
7615     // a chance to reverse the transform if they want to do that. Also, it is
7616     // unlikely that the pattern would be formed late, so it's probably not
7617     // worth going through the other checks.
7618     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
7619         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
7620         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
7621       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
7622       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
7623       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
7624         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
7625         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
7626         //
7627         // The IR equivalent of this transform would have this form:
7628         //   %a = add %x, C
7629         //   %c = icmp ugt %x, ~C
7630         //   %r = select %c, -1, %a
7631         //   =>
7632         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
7633         //   %u0 = extractvalue %u, 0
7634         //   %u1 = extractvalue %u, 1
7635         //   %r = select %u1, -1, %u0
7636         SDVTList VTs = DAG.getVTList(VT, VT0);
7637         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
7638         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
7639       }
7640     }
7641
7642     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
7643         (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
7644       return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
7645                          N0.getOperand(2));
7646
7647     return SimplifySelect(DL, N0, N1, N2);
7648   }
7649
7650   return SDValue();
7651 }
7652
7653 static
7654 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
7655   SDLoc DL(N);
7656   EVT LoVT, HiVT;
7657   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
7658
7659   // Split the inputs.
7660   SDValue Lo, Hi, LL, LH, RL, RH;
7661   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
7662   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
7663
7664   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
7665   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
7666
7667   return std::make_pair(Lo, Hi);
7668 }
7669
7670 // This function assumes all the vselect's arguments are CONCAT_VECTOR
7671 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7672 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
7673   SDLoc DL(N);
7674   SDValue Cond = N->getOperand(0);
7675   SDValue LHS = N->getOperand(1);
7676   SDValue RHS = N->getOperand(2);
7677   EVT VT = N->getValueType(0);
7678   int NumElems = VT.getVectorNumElements();
7679   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
7680          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
7681          Cond.getOpcode() == ISD::BUILD_VECTOR);
7682
7683   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7684   // binary ones here.
7685   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
7686     return SDValue();
7687
7688   // We're sure we have an even number of elements due to the
7689   // concat_vectors we have as arguments to vselect.
7690   // Skip BV elements until we find one that's not an UNDEF
7691   // After we find an UNDEF element, keep looping until we get to half the
7692   // length of the BV and see if all the non-undef nodes are the same.
7693   ConstantSDNode *BottomHalf = nullptr;
7694   for (int i = 0; i < NumElems / 2; ++i) {
7695     if (Cond->getOperand(i)->isUndef())
7696       continue;
7697
7698     if (BottomHalf == nullptr)
7699       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7700     else if (Cond->getOperand(i).getNode() != BottomHalf)
7701       return SDValue();
7702   }
7703
7704   // Do the same for the second half of the BuildVector
7705   ConstantSDNode *TopHalf = nullptr;
7706   for (int i = NumElems / 2; i < NumElems; ++i) {
7707     if (Cond->getOperand(i)->isUndef())
7708       continue;
7709
7710     if (TopHalf == nullptr)
7711       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7712     else if (Cond->getOperand(i).getNode() != TopHalf)
7713       return SDValue();
7714   }
7715
7716   assert(TopHalf && BottomHalf &&
7717          "One half of the selector was all UNDEFs and the other was all the "
7718          "same value. This should have been addressed before this function.");
7719   return DAG.getNode(
7720       ISD::CONCAT_VECTORS, DL, VT,
7721       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
7722       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
7723 }
7724
7725 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
7726   if (Level >= AfterLegalizeTypes)
7727     return SDValue();
7728
7729   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
7730   SDValue Mask = MSC->getMask();
7731   SDValue Data  = MSC->getValue();
7732   SDLoc DL(N);
7733
7734   // If the MSCATTER data type requires splitting and the mask is provided by a
7735   // SETCC, then split both nodes and its operands before legalization. This
7736   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7737   // and enables future optimizations (e.g. min/max pattern matching on X86).
7738   if (Mask.getOpcode() != ISD::SETCC)
7739     return SDValue();
7740
7741   // Check if any splitting is required.
7742   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
7743       TargetLowering::TypeSplitVector)
7744     return SDValue();
7745   SDValue MaskLo, MaskHi;
7746   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7747
7748   EVT LoVT, HiVT;
7749   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
7750
7751   SDValue Chain = MSC->getChain();
7752
7753   EVT MemoryVT = MSC->getMemoryVT();
7754   unsigned Alignment = MSC->getOriginalAlignment();
7755
7756   EVT LoMemVT, HiMemVT;
7757   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7758
7759   SDValue DataLo, DataHi;
7760   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7761
7762   SDValue Scale = MSC->getScale();
7763   SDValue BasePtr = MSC->getBasePtr();
7764   SDValue IndexLo, IndexHi;
7765   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
7766
7767   MachineMemOperand *MMO = DAG.getMachineFunction().
7768     getMachineMemOperand(MSC->getPointerInfo(),
7769                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7770                           Alignment, MSC->getAAInfo(), MSC->getRanges());
7771
7772   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
7773   SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
7774                                     DataLo.getValueType(), DL, OpsLo, MMO);
7775
7776   // The order of the Scatter operation after split is well defined. The "Hi"
7777   // part comes after the "Lo". So these two operations should be chained one
7778   // after another.
7779   SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
7780   return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
7781                               DL, OpsHi, MMO);
7782 }
7783
7784 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
7785   if (Level >= AfterLegalizeTypes)
7786     return SDValue();
7787
7788   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
7789   SDValue Mask = MST->getMask();
7790   SDValue Data  = MST->getValue();
7791   EVT VT = Data.getValueType();
7792   SDLoc DL(N);
7793
7794   // If the MSTORE data type requires splitting and the mask is provided by a
7795   // SETCC, then split both nodes and its operands before legalization. This
7796   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7797   // and enables future optimizations (e.g. min/max pattern matching on X86).
7798   if (Mask.getOpcode() == ISD::SETCC) {
7799     // Check if any splitting is required.
7800     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7801         TargetLowering::TypeSplitVector)
7802       return SDValue();
7803
7804     SDValue MaskLo, MaskHi, Lo, Hi;
7805     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7806
7807     SDValue Chain = MST->getChain();
7808     SDValue Ptr   = MST->getBasePtr();
7809
7810     EVT MemoryVT = MST->getMemoryVT();
7811     unsigned Alignment = MST->getOriginalAlignment();
7812
7813     // if Alignment is equal to the vector size,
7814     // take the half of it for the second part
7815     unsigned SecondHalfAlignment =
7816       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
7817
7818     EVT LoMemVT, HiMemVT;
7819     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7820
7821     SDValue DataLo, DataHi;
7822     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7823
7824     MachineMemOperand *MMO = DAG.getMachineFunction().
7825       getMachineMemOperand(MST->getPointerInfo(),
7826                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7827                            Alignment, MST->getAAInfo(), MST->getRanges());
7828
7829     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
7830                             MST->isTruncatingStore(),
7831                             MST->isCompressingStore());
7832
7833     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7834                                      MST->isCompressingStore());
7835     unsigned HiOffset = LoMemVT.getStoreSize();
7836
7837     MMO = DAG.getMachineFunction().getMachineMemOperand(
7838         MST->getPointerInfo().getWithOffset(HiOffset),
7839         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
7840         MST->getAAInfo(), MST->getRanges());
7841
7842     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
7843                             MST->isTruncatingStore(),
7844                             MST->isCompressingStore());
7845
7846     AddToWorklist(Lo.getNode());
7847     AddToWorklist(Hi.getNode());
7848
7849     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7850   }
7851   return SDValue();
7852 }
7853
7854 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
7855   if (Level >= AfterLegalizeTypes)
7856     return SDValue();
7857
7858   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
7859   SDValue Mask = MGT->getMask();
7860   SDLoc DL(N);
7861
7862   // If the MGATHER result requires splitting and the mask is provided by a
7863   // SETCC, then split both nodes and its operands before legalization. This
7864   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7865   // and enables future optimizations (e.g. min/max pattern matching on X86).
7866
7867   if (Mask.getOpcode() != ISD::SETCC)
7868     return SDValue();
7869
7870   EVT VT = N->getValueType(0);
7871
7872   // Check if any splitting is required.
7873   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7874       TargetLowering::TypeSplitVector)
7875     return SDValue();
7876
7877   SDValue MaskLo, MaskHi, Lo, Hi;
7878   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7879
7880   SDValue PassThru = MGT->getPassThru();
7881   SDValue PassThruLo, PassThruHi;
7882   std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7883
7884   EVT LoVT, HiVT;
7885   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
7886
7887   SDValue Chain = MGT->getChain();
7888   EVT MemoryVT = MGT->getMemoryVT();
7889   unsigned Alignment = MGT->getOriginalAlignment();
7890
7891   EVT LoMemVT, HiMemVT;
7892   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7893
7894   SDValue Scale = MGT->getScale();
7895   SDValue BasePtr = MGT->getBasePtr();
7896   SDValue Index = MGT->getIndex();
7897   SDValue IndexLo, IndexHi;
7898   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
7899
7900   MachineMemOperand *MMO = DAG.getMachineFunction().
7901     getMachineMemOperand(MGT->getPointerInfo(),
7902                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7903                           Alignment, MGT->getAAInfo(), MGT->getRanges());
7904
7905   SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
7906   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
7907                            MMO);
7908
7909   SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
7910   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
7911                            MMO);
7912
7913   AddToWorklist(Lo.getNode());
7914   AddToWorklist(Hi.getNode());
7915
7916   // Build a factor node to remember that this load is independent of the
7917   // other one.
7918   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7919                       Hi.getValue(1));
7920
7921   // Legalized the chain result - switch anything that used the old chain to
7922   // use the new one.
7923   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
7924
7925   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7926
7927   SDValue RetOps[] = { GatherRes, Chain };
7928   return DAG.getMergeValues(RetOps, DL);
7929 }
7930
7931 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
7932   if (Level >= AfterLegalizeTypes)
7933     return SDValue();
7934
7935   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
7936   SDValue Mask = MLD->getMask();
7937   SDLoc DL(N);
7938
7939   // If the MLOAD result requires splitting and the mask is provided by a
7940   // SETCC, then split both nodes and its operands before legalization. This
7941   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7942   // and enables future optimizations (e.g. min/max pattern matching on X86).
7943   if (Mask.getOpcode() == ISD::SETCC) {
7944     EVT VT = N->getValueType(0);
7945
7946     // Check if any splitting is required.
7947     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7948         TargetLowering::TypeSplitVector)
7949       return SDValue();
7950
7951     SDValue MaskLo, MaskHi, Lo, Hi;
7952     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7953
7954     SDValue PassThru = MLD->getPassThru();
7955     SDValue PassThruLo, PassThruHi;
7956     std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7957
7958     EVT LoVT, HiVT;
7959     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
7960
7961     SDValue Chain = MLD->getChain();
7962     SDValue Ptr   = MLD->getBasePtr();
7963     EVT MemoryVT = MLD->getMemoryVT();
7964     unsigned Alignment = MLD->getOriginalAlignment();
7965
7966     // if Alignment is equal to the vector size,
7967     // take the half of it for the second part
7968     unsigned SecondHalfAlignment =
7969       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
7970          Alignment/2 : Alignment;
7971
7972     EVT LoMemVT, HiMemVT;
7973     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7974
7975     MachineMemOperand *MMO = DAG.getMachineFunction().
7976     getMachineMemOperand(MLD->getPointerInfo(),
7977                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7978                          Alignment, MLD->getAAInfo(), MLD->getRanges());
7979
7980     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
7981                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7982
7983     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7984                                      MLD->isExpandingLoad());
7985     unsigned HiOffset = LoMemVT.getStoreSize();
7986
7987     MMO = DAG.getMachineFunction().getMachineMemOperand(
7988         MLD->getPointerInfo().getWithOffset(HiOffset),
7989         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
7990         MLD->getAAInfo(), MLD->getRanges());
7991
7992     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
7993                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7994
7995     AddToWorklist(Lo.getNode());
7996     AddToWorklist(Hi.getNode());
7997
7998     // Build a factor node to remember that this load is independent of the
7999     // other one.
8000     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8001                         Hi.getValue(1));
8002
8003     // Legalized the chain result - switch anything that used the old chain to
8004     // use the new one.
8005     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
8006
8007     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8008
8009     SDValue RetOps[] = { LoadRes, Chain };
8010     return DAG.getMergeValues(RetOps, DL);
8011   }
8012   return SDValue();
8013 }
8014
8015 /// A vector select of 2 constant vectors can be simplified to math/logic to
8016 /// avoid a variable select instruction and possibly avoid constant loads.
8017 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8018   SDValue Cond = N->getOperand(0);
8019   SDValue N1 = N->getOperand(1);
8020   SDValue N2 = N->getOperand(2);
8021   EVT VT = N->getValueType(0);
8022   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
8023       !TLI.convertSelectOfConstantsToMath(VT) ||
8024       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
8025       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
8026     return SDValue();
8027
8028   // Check if we can use the condition value to increment/decrement a single
8029   // constant value. This simplifies a select to an add and removes a constant
8030   // load/materialization from the general case.
8031   bool AllAddOne = true;
8032   bool AllSubOne = true;
8033   unsigned Elts = VT.getVectorNumElements();
8034   for (unsigned i = 0; i != Elts; ++i) {
8035     SDValue N1Elt = N1.getOperand(i);
8036     SDValue N2Elt = N2.getOperand(i);
8037     if (N1Elt.isUndef() || N2Elt.isUndef())
8038       continue;
8039
8040     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8041     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8042     if (C1 != C2 + 1)
8043       AllAddOne = false;
8044     if (C1 != C2 - 1)
8045       AllSubOne = false;
8046   }
8047
8048   // Further simplifications for the extra-special cases where the constants are
8049   // all 0 or all -1 should be implemented as folds of these patterns.
8050   SDLoc DL(N);
8051   if (AllAddOne || AllSubOne) {
8052     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8053     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8054     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
8055     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8056     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8057   }
8058
8059   // The general case for select-of-constants:
8060   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8061   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8062   // leave that to a machine-specific pass.
8063   return SDValue();
8064 }
8065
8066 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
8067   SDValue N0 = N->getOperand(0);
8068   SDValue N1 = N->getOperand(1);
8069   SDValue N2 = N->getOperand(2);
8070   SDLoc DL(N);
8071
8072   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8073     return V;
8074
8075   // Canonicalize integer abs.
8076   // vselect (setg[te] X,  0),  X, -X ->
8077   // vselect (setgt    X, -1),  X, -X ->
8078   // vselect (setl[te] X,  0), -X,  X ->
8079   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8080   if (N0.getOpcode() == ISD::SETCC) {
8081     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
8082     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8083     bool isAbs = false;
8084     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
8085
8086     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
8087          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
8088         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
8089       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
8090     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
8091              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
8092       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8093
8094     if (isAbs) {
8095       EVT VT = LHS.getValueType();
8096       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
8097         return DAG.getNode(ISD::ABS, DL, VT, LHS);
8098
8099       SDValue Shift = DAG.getNode(
8100           ISD::SRA, DL, VT, LHS,
8101           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
8102       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
8103       AddToWorklist(Shift.getNode());
8104       AddToWorklist(Add.getNode());
8105       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
8106     }
8107
8108     // vselect x, y (fcmp lt x, y) -> fminnum x, y
8109     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8110     //
8111     // This is OK if we don't care about what happens if either operand is a
8112     // NaN.
8113     //
8114     EVT VT = N->getValueType(0);
8115     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(
8116                               DAG, N0.getOperand(0), N0.getOperand(1), TLI)) {
8117       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8118       if (SDValue FMinMax = combineMinNumMaxNum(
8119             DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
8120         return FMinMax;
8121     }
8122
8123     // If this select has a condition (setcc) with narrower operands than the
8124     // select, try to widen the compare to match the select width.
8125     // TODO: This should be extended to handle any constant.
8126     // TODO: This could be extended to handle non-loading patterns, but that
8127     //       requires thorough testing to avoid regressions.
8128     if (isNullOrNullSplat(RHS)) {
8129       EVT NarrowVT = LHS.getValueType();
8130       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8131       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8132       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8133       unsigned WideWidth = WideVT.getScalarSizeInBits();
8134       bool IsSigned = isSignedIntSetCC(CC);
8135       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8136       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8137           SetCCWidth != 1 && SetCCWidth < WideWidth &&
8138           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8139           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8140         // Both compare operands can be widened for free. The LHS can use an
8141         // extended load, and the RHS is a constant:
8142         //   vselect (ext (setcc load(X), C)), N1, N2 -->
8143         //   vselect (setcc extload(X), C'), N1, N2
8144         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8145         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8146         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8147         EVT WideSetCCVT = getSetCCResultType(WideVT);
8148         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8149         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8150       }
8151     }
8152   }
8153
8154   if (SimplifySelectOps(N, N1, N2))
8155     return SDValue(N, 0);  // Don't revisit N.
8156
8157   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8158   if (ISD::isBuildVectorAllOnes(N0.getNode()))
8159     return N1;
8160   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8161   if (ISD::isBuildVectorAllZeros(N0.getNode()))
8162     return N2;
8163
8164   // The ConvertSelectToConcatVector function is assuming both the above
8165   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8166   // and addressed.
8167   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8168       N2.getOpcode() == ISD::CONCAT_VECTORS &&
8169       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
8170     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8171       return CV;
8172   }
8173
8174   if (SDValue V = foldVSelectOfConstants(N))
8175     return V;
8176
8177   return SDValue();
8178 }
8179
8180 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8181   SDValue N0 = N->getOperand(0);
8182   SDValue N1 = N->getOperand(1);
8183   SDValue N2 = N->getOperand(2);
8184   SDValue N3 = N->getOperand(3);
8185   SDValue N4 = N->getOperand(4);
8186   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8187
8188   // fold select_cc lhs, rhs, x, x, cc -> x
8189   if (N2 == N3)
8190     return N2;
8191
8192   // Determine if the condition we're dealing with is constant
8193   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8194                                   CC, SDLoc(N), false)) {
8195     AddToWorklist(SCC.getNode());
8196
8197     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8198       if (!SCCC->isNullValue())
8199         return N2;    // cond always true -> true val
8200       else
8201         return N3;    // cond always false -> false val
8202     } else if (SCC->isUndef()) {
8203       // When the condition is UNDEF, just return the first operand. This is
8204       // coherent the DAG creation, no setcc node is created in this case
8205       return N2;
8206     } else if (SCC.getOpcode() == ISD::SETCC) {
8207       // Fold to a simpler select_cc
8208       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
8209                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
8210                          SCC.getOperand(2));
8211     }
8212   }
8213
8214   // If we can fold this based on the true/false value, do so.
8215   if (SimplifySelectOps(N, N2, N3))
8216     return SDValue(N, 0);  // Don't revisit N.
8217
8218   // fold select_cc into other things, such as min/max/abs
8219   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8220 }
8221
8222 SDValue DAGCombiner::visitSETCC(SDNode *N) {
8223   // setcc is very commonly used as an argument to brcond. This pattern
8224   // also lend itself to numerous combines and, as a result, it is desired
8225   // we keep the argument to a brcond as a setcc as much as possible.
8226   bool PreferSetCC =
8227       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
8228
8229   SDValue Combined = SimplifySetCC(
8230       N->getValueType(0), N->getOperand(0), N->getOperand(1),
8231       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8232
8233   if (!Combined)
8234     return SDValue();
8235
8236   // If we prefer to have a setcc, and we don't, we'll try our best to
8237   // recreate one using rebuildSetCC.
8238   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
8239     SDValue NewSetCC = rebuildSetCC(Combined);
8240
8241     // We don't have anything interesting to combine to.
8242     if (NewSetCC.getNode() == N)
8243       return SDValue();
8244
8245     if (NewSetCC)
8246       return NewSetCC;
8247   }
8248
8249   return Combined;
8250 }
8251
8252 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
8253   SDValue LHS = N->getOperand(0);
8254   SDValue RHS = N->getOperand(1);
8255   SDValue Carry = N->getOperand(2);
8256   SDValue Cond = N->getOperand(3);
8257
8258   // If Carry is false, fold to a regular SETCC.
8259   if (isNullConstant(Carry))
8260     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
8261
8262   return SDValue();
8263 }
8264
8265 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
8266 /// a build_vector of constants.
8267 /// This function is called by the DAGCombiner when visiting sext/zext/aext
8268 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
8269 /// Vector extends are not folded if operations are legal; this is to
8270 /// avoid introducing illegal build_vector dag nodes.
8271 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
8272                                          SelectionDAG &DAG, bool LegalTypes) {
8273   unsigned Opcode = N->getOpcode();
8274   SDValue N0 = N->getOperand(0);
8275   EVT VT = N->getValueType(0);
8276
8277   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
8278          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
8279          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
8280          && "Expected EXTEND dag node in input!");
8281
8282   // fold (sext c1) -> c1
8283   // fold (zext c1) -> c1
8284   // fold (aext c1) -> c1
8285   if (isa<ConstantSDNode>(N0))
8286     return DAG.getNode(Opcode, SDLoc(N), VT, N0);
8287
8288   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
8289   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
8290   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
8291   EVT SVT = VT.getScalarType();
8292   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
8293       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
8294     return SDValue();
8295
8296   // We can fold this node into a build_vector.
8297   unsigned VTBits = SVT.getSizeInBits();
8298   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
8299   SmallVector<SDValue, 8> Elts;
8300   unsigned NumElts = VT.getVectorNumElements();
8301   SDLoc DL(N);
8302
8303   // For zero-extensions, UNDEF elements still guarantee to have the upper
8304   // bits set to zero.
8305   bool IsZext =
8306       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
8307
8308   for (unsigned i = 0; i != NumElts; ++i) {
8309     SDValue Op = N0.getOperand(i);
8310     if (Op.isUndef()) {
8311       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
8312       continue;
8313     }
8314
8315     SDLoc DL(Op);
8316     // Get the constant value and if needed trunc it to the size of the type.
8317     // Nodes like build_vector might have constants wider than the scalar type.
8318     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
8319     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
8320       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
8321     else
8322       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
8323   }
8324
8325   return DAG.getBuildVector(VT, DL, Elts);
8326 }
8327
8328 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
8329 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
8330 // transformation. Returns true if extension are possible and the above
8331 // mentioned transformation is profitable.
8332 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
8333                                     unsigned ExtOpc,
8334                                     SmallVectorImpl<SDNode *> &ExtendNodes,
8335                                     const TargetLowering &TLI) {
8336   bool HasCopyToRegUses = false;
8337   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
8338   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
8339                             UE = N0.getNode()->use_end();
8340        UI != UE; ++UI) {
8341     SDNode *User = *UI;
8342     if (User == N)
8343       continue;
8344     if (UI.getUse().getResNo() != N0.getResNo())
8345       continue;
8346     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
8347     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
8348       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
8349       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
8350         // Sign bits will be lost after a zext.
8351         return false;
8352       bool Add = false;
8353       for (unsigned i = 0; i != 2; ++i) {
8354         SDValue UseOp = User->getOperand(i);
8355         if (UseOp == N0)
8356           continue;
8357         if (!isa<ConstantSDNode>(UseOp))
8358           return false;
8359         Add = true;
8360       }
8361       if (Add)
8362         ExtendNodes.push_back(User);
8363       continue;
8364     }
8365     // If truncates aren't free and there are users we can't
8366     // extend, it isn't worthwhile.
8367     if (!isTruncFree)
8368       return false;
8369     // Remember if this value is live-out.
8370     if (User->getOpcode() == ISD::CopyToReg)
8371       HasCopyToRegUses = true;
8372   }
8373
8374   if (HasCopyToRegUses) {
8375     bool BothLiveOut = false;
8376     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8377          UI != UE; ++UI) {
8378       SDUse &Use = UI.getUse();
8379       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8380         BothLiveOut = true;
8381         break;
8382       }
8383     }
8384     if (BothLiveOut)
8385       // Both unextended and extended values are live out. There had better be
8386       // a good reason for the transformation.
8387       return ExtendNodes.size();
8388   }
8389   return true;
8390 }
8391
8392 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8393                                   SDValue OrigLoad, SDValue ExtLoad,
8394                                   ISD::NodeType ExtType) {
8395   // Extend SetCC uses if necessary.
8396   SDLoc DL(ExtLoad);
8397   for (SDNode *SetCC : SetCCs) {
8398     SmallVector<SDValue, 4> Ops;
8399
8400     for (unsigned j = 0; j != 2; ++j) {
8401       SDValue SOp = SetCC->getOperand(j);
8402       if (SOp == OrigLoad)
8403         Ops.push_back(ExtLoad);
8404       else
8405         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8406     }
8407
8408     Ops.push_back(SetCC->getOperand(2));
8409     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8410   }
8411 }
8412
8413 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8414 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8415   SDValue N0 = N->getOperand(0);
8416   EVT DstVT = N->getValueType(0);
8417   EVT SrcVT = N0.getValueType();
8418
8419   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8420           N->getOpcode() == ISD::ZERO_EXTEND) &&
8421          "Unexpected node type (not an extend)!");
8422
8423   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8424   // For example, on a target with legal v4i32, but illegal v8i32, turn:
8425   //   (v8i32 (sext (v8i16 (load x))))
8426   // into:
8427   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
8428   //                          (v4i32 (sextload (x + 16)))))
8429   // Where uses of the original load, i.e.:
8430   //   (v8i16 (load x))
8431   // are replaced with:
8432   //   (v8i16 (truncate
8433   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
8434   //                            (v4i32 (sextload (x + 16)))))))
8435   //
8436   // This combine is only applicable to illegal, but splittable, vectors.
8437   // All legal types, and illegal non-vector types, are handled elsewhere.
8438   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8439   //
8440   if (N0->getOpcode() != ISD::LOAD)
8441     return SDValue();
8442
8443   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8444
8445   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
8446       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
8447       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
8448     return SDValue();
8449
8450   SmallVector<SDNode *, 4> SetCCs;
8451   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
8452     return SDValue();
8453
8454   ISD::LoadExtType ExtType =
8455       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8456
8457   // Try to split the vector types to get down to legal types.
8458   EVT SplitSrcVT = SrcVT;
8459   EVT SplitDstVT = DstVT;
8460   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
8461          SplitSrcVT.getVectorNumElements() > 1) {
8462     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
8463     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
8464   }
8465
8466   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
8467     return SDValue();
8468
8469   SDLoc DL(N);
8470   const unsigned NumSplits =
8471       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
8472   const unsigned Stride = SplitSrcVT.getStoreSize();
8473   SmallVector<SDValue, 4> Loads;
8474   SmallVector<SDValue, 4> Chains;
8475
8476   SDValue BasePtr = LN0->getBasePtr();
8477   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
8478     const unsigned Offset = Idx * Stride;
8479     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
8480
8481     SDValue SplitLoad = DAG.getExtLoad(
8482         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
8483         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
8484         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8485
8486     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8487                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
8488
8489     Loads.push_back(SplitLoad.getValue(0));
8490     Chains.push_back(SplitLoad.getValue(1));
8491   }
8492
8493   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
8494   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
8495
8496   // Simplify TF.
8497   AddToWorklist(NewChain.getNode());
8498
8499   CombineTo(N, NewValue);
8500
8501   // Replace uses of the original load (before extension)
8502   // with a truncate of the concatenated sextloaded vectors.
8503   SDValue Trunc =
8504       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
8505   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
8506   CombineTo(N0.getNode(), Trunc, NewChain);
8507   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8508 }
8509
8510 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8511 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8512 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
8513   assert(N->getOpcode() == ISD::ZERO_EXTEND);
8514   EVT VT = N->getValueType(0);
8515   EVT OrigVT = N->getOperand(0).getValueType();
8516   if (TLI.isZExtFree(OrigVT, VT))
8517     return SDValue();
8518
8519   // and/or/xor
8520   SDValue N0 = N->getOperand(0);
8521   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8522         N0.getOpcode() == ISD::XOR) ||
8523       N0.getOperand(1).getOpcode() != ISD::Constant ||
8524       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
8525     return SDValue();
8526
8527   // shl/shr
8528   SDValue N1 = N0->getOperand(0);
8529   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
8530       N1.getOperand(1).getOpcode() != ISD::Constant ||
8531       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
8532     return SDValue();
8533
8534   // load
8535   if (!isa<LoadSDNode>(N1.getOperand(0)))
8536     return SDValue();
8537   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
8538   EVT MemVT = Load->getMemoryVT();
8539   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
8540       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
8541     return SDValue();
8542
8543
8544   // If the shift op is SHL, the logic op must be AND, otherwise the result
8545   // will be wrong.
8546   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
8547     return SDValue();
8548
8549   if (!N0.hasOneUse() || !N1.hasOneUse())
8550     return SDValue();
8551
8552   SmallVector<SDNode*, 4> SetCCs;
8553   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
8554                                ISD::ZERO_EXTEND, SetCCs, TLI))
8555     return SDValue();
8556
8557   // Actually do the transformation.
8558   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
8559                                    Load->getChain(), Load->getBasePtr(),
8560                                    Load->getMemoryVT(), Load->getMemOperand());
8561
8562   SDLoc DL1(N1);
8563   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
8564                               N1.getOperand(1));
8565
8566   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8567   Mask = Mask.zext(VT.getSizeInBits());
8568   SDLoc DL0(N0);
8569   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
8570                             DAG.getConstant(Mask, DL0, VT));
8571
8572   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8573   CombineTo(N, And);
8574   if (SDValue(Load, 0).hasOneUse()) {
8575     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
8576   } else {
8577     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
8578                                 Load->getValueType(0), ExtLoad);
8579     CombineTo(Load, Trunc, ExtLoad.getValue(1));
8580   }
8581
8582   // N0 is dead at this point.
8583   recursivelyDeleteUnusedNodes(N0.getNode());
8584
8585   return SDValue(N,0); // Return N so it doesn't get rechecked!
8586 }
8587
8588 /// If we're narrowing or widening the result of a vector select and the final
8589 /// size is the same size as a setcc (compare) feeding the select, then try to
8590 /// apply the cast operation to the select's operands because matching vector
8591 /// sizes for a select condition and other operands should be more efficient.
8592 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
8593   unsigned CastOpcode = Cast->getOpcode();
8594   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
8595           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
8596           CastOpcode == ISD::FP_ROUND) &&
8597          "Unexpected opcode for vector select narrowing/widening");
8598
8599   // We only do this transform before legal ops because the pattern may be
8600   // obfuscated by target-specific operations after legalization. Do not create
8601   // an illegal select op, however, because that may be difficult to lower.
8602   EVT VT = Cast->getValueType(0);
8603   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
8604     return SDValue();
8605
8606   SDValue VSel = Cast->getOperand(0);
8607   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
8608       VSel.getOperand(0).getOpcode() != ISD::SETCC)
8609     return SDValue();
8610
8611   // Does the setcc have the same vector size as the casted select?
8612   SDValue SetCC = VSel.getOperand(0);
8613   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
8614   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
8615     return SDValue();
8616
8617   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8618   SDValue A = VSel.getOperand(1);
8619   SDValue B = VSel.getOperand(2);
8620   SDValue CastA, CastB;
8621   SDLoc DL(Cast);
8622   if (CastOpcode == ISD::FP_ROUND) {
8623     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8624     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
8625     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
8626   } else {
8627     CastA = DAG.getNode(CastOpcode, DL, VT, A);
8628     CastB = DAG.getNode(CastOpcode, DL, VT, B);
8629   }
8630   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
8631 }
8632
8633 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8634 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8635 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
8636                                      const TargetLowering &TLI, EVT VT,
8637                                      bool LegalOperations, SDNode *N,
8638                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
8639   SDNode *N0Node = N0.getNode();
8640   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
8641                                                    : ISD::isZEXTLoad(N0Node);
8642   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
8643       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
8644     return SDValue();
8645
8646   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8647   EVT MemVT = LN0->getMemoryVT();
8648   if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
8649       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
8650     return SDValue();
8651
8652   SDValue ExtLoad =
8653       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8654                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8655   Combiner.CombineTo(N, ExtLoad);
8656   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8657   if (LN0->use_empty())
8658     Combiner.recursivelyDeleteUnusedNodes(LN0);
8659   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8660 }
8661
8662 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8663 // Only generate vector extloads when 1) they're legal, and 2) they are
8664 // deemed desirable by the target.
8665 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
8666                                   const TargetLowering &TLI, EVT VT,
8667                                   bool LegalOperations, SDNode *N, SDValue N0,
8668                                   ISD::LoadExtType ExtLoadType,
8669                                   ISD::NodeType ExtOpc) {
8670   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
8671       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
8672       ((LegalOperations || VT.isVector() ||
8673         cast<LoadSDNode>(N0)->isVolatile()) &&
8674        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
8675     return {};
8676
8677   bool DoXform = true;
8678   SmallVector<SDNode *, 4> SetCCs;
8679   if (!N0.hasOneUse())
8680     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
8681   if (VT.isVector())
8682     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
8683   if (!DoXform)
8684     return {};
8685
8686   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8687   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8688                                    LN0->getBasePtr(), N0.getValueType(),
8689                                    LN0->getMemOperand());
8690   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
8691   // If the load value is used only by N, replace it via CombineTo N.
8692   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
8693   Combiner.CombineTo(N, ExtLoad);
8694   if (NoReplaceTrunc) {
8695     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8696     Combiner.recursivelyDeleteUnusedNodes(LN0);
8697   } else {
8698     SDValue Trunc =
8699         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
8700     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8701   }
8702   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8703 }
8704
8705 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
8706                                        bool LegalOperations) {
8707   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8708           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
8709
8710   SDValue SetCC = N->getOperand(0);
8711   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
8712       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
8713     return SDValue();
8714
8715   SDValue X = SetCC.getOperand(0);
8716   SDValue Ones = SetCC.getOperand(1);
8717   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
8718   EVT VT = N->getValueType(0);
8719   EVT XVT = X.getValueType();
8720   // setge X, C is canonicalized to setgt, so we do not need to match that
8721   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
8722   // not require the 'not' op.
8723   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
8724     // Invert and smear/shift the sign bit:
8725     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
8726     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
8727     SDLoc DL(N);
8728     SDValue NotX = DAG.getNOT(DL, X, VT);
8729     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
8730     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
8731     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
8732   }
8733   return SDValue();
8734 }
8735
8736 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
8737   SDValue N0 = N->getOperand(0);
8738   EVT VT = N->getValueType(0);
8739   SDLoc DL(N);
8740
8741   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
8742     return Res;
8743
8744   // fold (sext (sext x)) -> (sext x)
8745   // fold (sext (aext x)) -> (sext x)
8746   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8747     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
8748
8749   if (N0.getOpcode() == ISD::TRUNCATE) {
8750     // fold (sext (truncate (load x))) -> (sext (smaller load x))
8751     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8752     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8753       SDNode *oye = N0.getOperand(0).getNode();
8754       if (NarrowLoad.getNode() != N0.getNode()) {
8755         CombineTo(N0.getNode(), NarrowLoad);
8756         // CombineTo deleted the truncate, if needed, but not what's under it.
8757         AddToWorklist(oye);
8758       }
8759       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8760     }
8761
8762     // See if the value being truncated is already sign extended.  If so, just
8763     // eliminate the trunc/sext pair.
8764     SDValue Op = N0.getOperand(0);
8765     unsigned OpBits   = Op.getScalarValueSizeInBits();
8766     unsigned MidBits  = N0.getScalarValueSizeInBits();
8767     unsigned DestBits = VT.getScalarSizeInBits();
8768     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
8769
8770     if (OpBits == DestBits) {
8771       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
8772       // bits, it is already ready.
8773       if (NumSignBits > DestBits-MidBits)
8774         return Op;
8775     } else if (OpBits < DestBits) {
8776       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
8777       // bits, just sext from i32.
8778       if (NumSignBits > OpBits-MidBits)
8779         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
8780     } else {
8781       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
8782       // bits, just truncate to i32.
8783       if (NumSignBits > OpBits-MidBits)
8784         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
8785     }
8786
8787     // fold (sext (truncate x)) -> (sextinreg x).
8788     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
8789                                                  N0.getValueType())) {
8790       if (OpBits < DestBits)
8791         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
8792       else if (OpBits > DestBits)
8793         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
8794       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8795                          DAG.getValueType(N0.getValueType()));
8796     }
8797   }
8798
8799   // Try to simplify (sext (load x)).
8800   if (SDValue foldedExt =
8801           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8802                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
8803     return foldedExt;
8804
8805   // fold (sext (load x)) to multiple smaller sextloads.
8806   // Only on illegal but splittable vectors.
8807   if (SDValue ExtLoad = CombineExtLoad(N))
8808     return ExtLoad;
8809
8810   // Try to simplify (sext (sextload x)).
8811   if (SDValue foldedExt = tryToFoldExtOfExtload(
8812           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
8813     return foldedExt;
8814
8815   // fold (sext (and/or/xor (load x), cst)) ->
8816   //      (and/or/xor (sextload x), (sext cst))
8817   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8818        N0.getOpcode() == ISD::XOR) &&
8819       isa<LoadSDNode>(N0.getOperand(0)) &&
8820       N0.getOperand(1).getOpcode() == ISD::Constant &&
8821       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8822     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8823     EVT MemVT = LN00->getMemoryVT();
8824     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
8825       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
8826       SmallVector<SDNode*, 4> SetCCs;
8827       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8828                                              ISD::SIGN_EXTEND, SetCCs, TLI);
8829       if (DoXform) {
8830         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
8831                                          LN00->getChain(), LN00->getBasePtr(),
8832                                          LN00->getMemoryVT(),
8833                                          LN00->getMemOperand());
8834         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8835         Mask = Mask.sext(VT.getSizeInBits());
8836         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8837                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
8838         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
8839         bool NoReplaceTruncAnd = !N0.hasOneUse();
8840         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8841         CombineTo(N, And);
8842         // If N0 has multiple uses, change other uses as well.
8843         if (NoReplaceTruncAnd) {
8844           SDValue TruncAnd =
8845               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8846           CombineTo(N0.getNode(), TruncAnd);
8847         }
8848         if (NoReplaceTrunc) {
8849           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8850         } else {
8851           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8852                                       LN00->getValueType(0), ExtLoad);
8853           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8854         }
8855         return SDValue(N,0); // Return N so it doesn't get rechecked!
8856       }
8857     }
8858   }
8859
8860   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8861     return V;
8862
8863   if (N0.getOpcode() == ISD::SETCC) {
8864     SDValue N00 = N0.getOperand(0);
8865     SDValue N01 = N0.getOperand(1);
8866     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8867     EVT N00VT = N0.getOperand(0).getValueType();
8868
8869     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
8870     // Only do this before legalize for now.
8871     if (VT.isVector() && !LegalOperations &&
8872         TLI.getBooleanContents(N00VT) ==
8873             TargetLowering::ZeroOrNegativeOneBooleanContent) {
8874       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
8875       // of the same size as the compared operands. Only optimize sext(setcc())
8876       // if this is the case.
8877       EVT SVT = getSetCCResultType(N00VT);
8878
8879       // If we already have the desired type, don't change it.
8880       if (SVT != N0.getValueType()) {
8881         // We know that the # elements of the results is the same as the
8882         // # elements of the compare (and the # elements of the compare result
8883         // for that matter).  Check to see that they are the same size.  If so,
8884         // we know that the element size of the sext'd result matches the
8885         // element size of the compare operands.
8886         if (VT.getSizeInBits() == SVT.getSizeInBits())
8887           return DAG.getSetCC(DL, VT, N00, N01, CC);
8888
8889         // If the desired elements are smaller or larger than the source
8890         // elements, we can use a matching integer vector type and then
8891         // truncate/sign extend.
8892         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
8893         if (SVT == MatchingVecType) {
8894           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
8895           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
8896         }
8897       }
8898     }
8899
8900     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
8901     // Here, T can be 1 or -1, depending on the type of the setcc and
8902     // getBooleanContents().
8903     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
8904
8905     // To determine the "true" side of the select, we need to know the high bit
8906     // of the value returned by the setcc if it evaluates to true.
8907     // If the type of the setcc is i1, then the true case of the select is just
8908     // sext(i1 1), that is, -1.
8909     // If the type of the setcc is larger (say, i8) then the value of the high
8910     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
8911     // of the appropriate width.
8912     SDValue ExtTrueVal = (SetCCWidth == 1)
8913                              ? DAG.getAllOnesConstant(DL, VT)
8914                              : DAG.getBoolConstant(true, DL, VT, N00VT);
8915     SDValue Zero = DAG.getConstant(0, DL, VT);
8916     if (SDValue SCC =
8917             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
8918       return SCC;
8919
8920     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
8921       EVT SetCCVT = getSetCCResultType(N00VT);
8922       // Don't do this transform for i1 because there's a select transform
8923       // that would reverse it.
8924       // TODO: We should not do this transform at all without a target hook
8925       // because a sext is likely cheaper than a select?
8926       if (SetCCVT.getScalarSizeInBits() != 1 &&
8927           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
8928         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
8929         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
8930       }
8931     }
8932   }
8933
8934   // fold (sext x) -> (zext x) if the sign bit is known zero.
8935   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
8936       DAG.SignBitIsZero(N0))
8937     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
8938
8939   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8940     return NewVSel;
8941
8942   return SDValue();
8943 }
8944
8945 // isTruncateOf - If N is a truncate of some other value, return true, record
8946 // the value being truncated in Op and which of Op's bits are zero/one in Known.
8947 // This function computes KnownBits to avoid a duplicated call to
8948 // computeKnownBits in the caller.
8949 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
8950                          KnownBits &Known) {
8951   if (N->getOpcode() == ISD::TRUNCATE) {
8952     Op = N->getOperand(0);
8953     Known = DAG.computeKnownBits(Op);
8954     return true;
8955   }
8956
8957   if (N.getOpcode() != ISD::SETCC ||
8958       N.getValueType().getScalarType() != MVT::i1 ||
8959       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
8960     return false;
8961
8962   SDValue Op0 = N->getOperand(0);
8963   SDValue Op1 = N->getOperand(1);
8964   assert(Op0.getValueType() == Op1.getValueType());
8965
8966   if (isNullOrNullSplat(Op0))
8967     Op = Op1;
8968   else if (isNullOrNullSplat(Op1))
8969     Op = Op0;
8970   else
8971     return false;
8972
8973   Known = DAG.computeKnownBits(Op);
8974
8975   return (Known.Zero | 1).isAllOnesValue();
8976 }
8977
8978 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
8979   SDValue N0 = N->getOperand(0);
8980   EVT VT = N->getValueType(0);
8981
8982   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
8983     return Res;
8984
8985   // fold (zext (zext x)) -> (zext x)
8986   // fold (zext (aext x)) -> (zext x)
8987   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8988     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
8989                        N0.getOperand(0));
8990
8991   // fold (zext (truncate x)) -> (zext x) or
8992   //      (zext (truncate x)) -> (truncate x)
8993   // This is valid when the truncated bits of x are already zero.
8994   SDValue Op;
8995   KnownBits Known;
8996   if (isTruncateOf(DAG, N0, Op, Known)) {
8997     APInt TruncatedBits =
8998       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
8999       APInt(Op.getScalarValueSizeInBits(), 0) :
9000       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
9001                         N0.getScalarValueSizeInBits(),
9002                         std::min(Op.getScalarValueSizeInBits(),
9003                                  VT.getScalarSizeInBits()));
9004     if (TruncatedBits.isSubsetOf(Known.Zero))
9005       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9006   }
9007
9008   // fold (zext (truncate x)) -> (and x, mask)
9009   if (N0.getOpcode() == ISD::TRUNCATE) {
9010     // fold (zext (truncate (load x))) -> (zext (smaller load x))
9011     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9012     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9013       SDNode *oye = N0.getOperand(0).getNode();
9014       if (NarrowLoad.getNode() != N0.getNode()) {
9015         CombineTo(N0.getNode(), NarrowLoad);
9016         // CombineTo deleted the truncate, if needed, but not what's under it.
9017         AddToWorklist(oye);
9018       }
9019       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9020     }
9021
9022     EVT SrcVT = N0.getOperand(0).getValueType();
9023     EVT MinVT = N0.getValueType();
9024
9025     // Try to mask before the extension to avoid having to generate a larger mask,
9026     // possibly over several sub-vectors.
9027     if (SrcVT.bitsLT(VT) && VT.isVector()) {
9028       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
9029                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
9030         SDValue Op = N0.getOperand(0);
9031         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9032         AddToWorklist(Op.getNode());
9033         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9034         // Transfer the debug info; the new node is equivalent to N0.
9035         DAG.transferDbgValues(N0, ZExtOrTrunc);
9036         return ZExtOrTrunc;
9037       }
9038     }
9039
9040     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
9041       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9042       AddToWorklist(Op.getNode());
9043       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9044       // We may safely transfer the debug info describing the truncate node over
9045       // to the equivalent and operation.
9046       DAG.transferDbgValues(N0, And);
9047       return And;
9048     }
9049   }
9050
9051   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9052   // if either of the casts is not free.
9053   if (N0.getOpcode() == ISD::AND &&
9054       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9055       N0.getOperand(1).getOpcode() == ISD::Constant &&
9056       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9057                            N0.getValueType()) ||
9058        !TLI.isZExtFree(N0.getValueType(), VT))) {
9059     SDValue X = N0.getOperand(0).getOperand(0);
9060     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
9061     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9062     Mask = Mask.zext(VT.getSizeInBits());
9063     SDLoc DL(N);
9064     return DAG.getNode(ISD::AND, DL, VT,
9065                        X, DAG.getConstant(Mask, DL, VT));
9066   }
9067
9068   // Try to simplify (zext (load x)).
9069   if (SDValue foldedExt =
9070           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9071                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
9072     return foldedExt;
9073
9074   // fold (zext (load x)) to multiple smaller zextloads.
9075   // Only on illegal but splittable vectors.
9076   if (SDValue ExtLoad = CombineExtLoad(N))
9077     return ExtLoad;
9078
9079   // fold (zext (and/or/xor (load x), cst)) ->
9080   //      (and/or/xor (zextload x), (zext cst))
9081   // Unless (and (load x) cst) will match as a zextload already and has
9082   // additional users.
9083   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9084        N0.getOpcode() == ISD::XOR) &&
9085       isa<LoadSDNode>(N0.getOperand(0)) &&
9086       N0.getOperand(1).getOpcode() == ISD::Constant &&
9087       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9088     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9089     EVT MemVT = LN00->getMemoryVT();
9090     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
9091         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
9092       bool DoXform = true;
9093       SmallVector<SDNode*, 4> SetCCs;
9094       if (!N0.hasOneUse()) {
9095         if (N0.getOpcode() == ISD::AND) {
9096           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
9097           EVT LoadResultTy = AndC->getValueType(0);
9098           EVT ExtVT;
9099           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
9100             DoXform = false;
9101         }
9102       }
9103       if (DoXform)
9104         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9105                                           ISD::ZERO_EXTEND, SetCCs, TLI);
9106       if (DoXform) {
9107         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
9108                                          LN00->getChain(), LN00->getBasePtr(),
9109                                          LN00->getMemoryVT(),
9110                                          LN00->getMemOperand());
9111         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9112         Mask = Mask.zext(VT.getSizeInBits());
9113         SDLoc DL(N);
9114         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9115                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9116         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9117         bool NoReplaceTruncAnd = !N0.hasOneUse();
9118         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9119         CombineTo(N, And);
9120         // If N0 has multiple uses, change other uses as well.
9121         if (NoReplaceTruncAnd) {
9122           SDValue TruncAnd =
9123               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9124           CombineTo(N0.getNode(), TruncAnd);
9125         }
9126         if (NoReplaceTrunc) {
9127           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9128         } else {
9129           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9130                                       LN00->getValueType(0), ExtLoad);
9131           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9132         }
9133         return SDValue(N,0); // Return N so it doesn't get rechecked!
9134       }
9135     }
9136   }
9137
9138   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9139   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9140   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9141     return ZExtLoad;
9142
9143   // Try to simplify (zext (zextload x)).
9144   if (SDValue foldedExt = tryToFoldExtOfExtload(
9145           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9146     return foldedExt;
9147
9148   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9149     return V;
9150
9151   if (N0.getOpcode() == ISD::SETCC) {
9152     // Only do this before legalize for now.
9153     if (!LegalOperations && VT.isVector() &&
9154         N0.getValueType().getVectorElementType() == MVT::i1) {
9155       EVT N00VT = N0.getOperand(0).getValueType();
9156       if (getSetCCResultType(N00VT) == N0.getValueType())
9157         return SDValue();
9158
9159       // We know that the # elements of the results is the same as the #
9160       // elements of the compare (and the # elements of the compare result for
9161       // that matter). Check to see that they are the same size. If so, we know
9162       // that the element size of the sext'd result matches the element size of
9163       // the compare operands.
9164       SDLoc DL(N);
9165       SDValue VecOnes = DAG.getConstant(1, DL, VT);
9166       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9167         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9168         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9169                                      N0.getOperand(1), N0.getOperand(2));
9170         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9171       }
9172
9173       // If the desired elements are smaller or larger than the source
9174       // elements we can use a matching integer vector type and then
9175       // truncate/sign extend.
9176       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9177       SDValue VsetCC =
9178           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9179                       N0.getOperand(1), N0.getOperand(2));
9180       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9181                          VecOnes);
9182     }
9183
9184     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9185     SDLoc DL(N);
9186     if (SDValue SCC = SimplifySelectCC(
9187             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9188             DAG.getConstant(0, DL, VT),
9189             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9190       return SCC;
9191   }
9192
9193   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9194   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9195       isa<ConstantSDNode>(N0.getOperand(1)) &&
9196       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9197       N0.hasOneUse()) {
9198     SDValue ShAmt = N0.getOperand(1);
9199     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
9200     if (N0.getOpcode() == ISD::SHL) {
9201       SDValue InnerZExt = N0.getOperand(0);
9202       // If the original shl may be shifting out bits, do not perform this
9203       // transformation.
9204       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
9205         InnerZExt.getOperand(0).getValueSizeInBits();
9206       if (ShAmtVal > KnownZeroBits)
9207         return SDValue();
9208     }
9209
9210     SDLoc DL(N);
9211
9212     // Ensure that the shift amount is wide enough for the shifted value.
9213     if (VT.getSizeInBits() >= 256)
9214       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
9215
9216     return DAG.getNode(N0.getOpcode(), DL, VT,
9217                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
9218                        ShAmt);
9219   }
9220
9221   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9222     return NewVSel;
9223
9224   return SDValue();
9225 }
9226
9227 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
9228   SDValue N0 = N->getOperand(0);
9229   EVT VT = N->getValueType(0);
9230
9231   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9232     return Res;
9233
9234   // fold (aext (aext x)) -> (aext x)
9235   // fold (aext (zext x)) -> (zext x)
9236   // fold (aext (sext x)) -> (sext x)
9237   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
9238       N0.getOpcode() == ISD::ZERO_EXTEND ||
9239       N0.getOpcode() == ISD::SIGN_EXTEND)
9240     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9241
9242   // fold (aext (truncate (load x))) -> (aext (smaller load x))
9243   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
9244   if (N0.getOpcode() == ISD::TRUNCATE) {
9245     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9246       SDNode *oye = N0.getOperand(0).getNode();
9247       if (NarrowLoad.getNode() != N0.getNode()) {
9248         CombineTo(N0.getNode(), NarrowLoad);
9249         // CombineTo deleted the truncate, if needed, but not what's under it.
9250         AddToWorklist(oye);
9251       }
9252       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9253     }
9254   }
9255
9256   // fold (aext (truncate x))
9257   if (N0.getOpcode() == ISD::TRUNCATE)
9258     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9259
9260   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
9261   // if the trunc is not free.
9262   if (N0.getOpcode() == ISD::AND &&
9263       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9264       N0.getOperand(1).getOpcode() == ISD::Constant &&
9265       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9266                           N0.getValueType())) {
9267     SDLoc DL(N);
9268     SDValue X = N0.getOperand(0).getOperand(0);
9269     X = DAG.getAnyExtOrTrunc(X, DL, VT);
9270     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9271     Mask = Mask.zext(VT.getSizeInBits());
9272     return DAG.getNode(ISD::AND, DL, VT,
9273                        X, DAG.getConstant(Mask, DL, VT));
9274   }
9275
9276   // fold (aext (load x)) -> (aext (truncate (extload x)))
9277   // None of the supported targets knows how to perform load and any_ext
9278   // on vectors in one instruction.  We only perform this transformation on
9279   // scalars.
9280   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
9281       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9282       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9283     bool DoXform = true;
9284     SmallVector<SDNode*, 4> SetCCs;
9285     if (!N0.hasOneUse())
9286       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
9287                                         TLI);
9288     if (DoXform) {
9289       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9290       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9291                                        LN0->getChain(),
9292                                        LN0->getBasePtr(), N0.getValueType(),
9293                                        LN0->getMemOperand());
9294       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
9295       // If the load value is used only by N, replace it via CombineTo N.
9296       bool NoReplaceTrunc = N0.hasOneUse();
9297       CombineTo(N, ExtLoad);
9298       if (NoReplaceTrunc) {
9299         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9300         recursivelyDeleteUnusedNodes(LN0);
9301       } else {
9302         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
9303                                     N0.getValueType(), ExtLoad);
9304         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9305       }
9306       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9307     }
9308   }
9309
9310   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
9311   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
9312   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
9313   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
9314       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
9315     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9316     ISD::LoadExtType ExtType = LN0->getExtensionType();
9317     EVT MemVT = LN0->getMemoryVT();
9318     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
9319       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
9320                                        VT, LN0->getChain(), LN0->getBasePtr(),
9321                                        MemVT, LN0->getMemOperand());
9322       CombineTo(N, ExtLoad);
9323       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9324       recursivelyDeleteUnusedNodes(LN0);
9325       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9326     }
9327   }
9328
9329   if (N0.getOpcode() == ISD::SETCC) {
9330     // For vectors:
9331     // aext(setcc) -> vsetcc
9332     // aext(setcc) -> truncate(vsetcc)
9333     // aext(setcc) -> aext(vsetcc)
9334     // Only do this before legalize for now.
9335     if (VT.isVector() && !LegalOperations) {
9336       EVT N00VT = N0.getOperand(0).getValueType();
9337       if (getSetCCResultType(N00VT) == N0.getValueType())
9338         return SDValue();
9339
9340       // We know that the # elements of the results is the same as the
9341       // # elements of the compare (and the # elements of the compare result
9342       // for that matter).  Check to see that they are the same size.  If so,
9343       // we know that the element size of the sext'd result matches the
9344       // element size of the compare operands.
9345       if (VT.getSizeInBits() == N00VT.getSizeInBits())
9346         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
9347                              N0.getOperand(1),
9348                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
9349
9350       // If the desired elements are smaller or larger than the source
9351       // elements we can use a matching integer vector type and then
9352       // truncate/any extend
9353       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9354       SDValue VsetCC =
9355         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
9356                       N0.getOperand(1),
9357                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
9358       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
9359     }
9360
9361     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9362     SDLoc DL(N);
9363     if (SDValue SCC = SimplifySelectCC(
9364             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9365             DAG.getConstant(0, DL, VT),
9366             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9367       return SCC;
9368   }
9369
9370   return SDValue();
9371 }
9372
9373 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
9374   unsigned Opcode = N->getOpcode();
9375   SDValue N0 = N->getOperand(0);
9376   SDValue N1 = N->getOperand(1);
9377   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9378
9379   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9380   if (N0.getOpcode() == Opcode &&
9381       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9382     return N0;
9383
9384   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9385       N0.getOperand(0).getOpcode() == Opcode) {
9386     // We have an assert, truncate, assert sandwich. Make one stronger assert
9387     // by asserting on the smallest asserted type to the larger source type.
9388     // This eliminates the later assert:
9389     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9390     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9391     SDValue BigA = N0.getOperand(0);
9392     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9393     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9394            "Asserting zero/sign-extended bits to a type larger than the "
9395            "truncated destination does not provide information");
9396
9397     SDLoc DL(N);
9398     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9399     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9400     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9401                                     BigA.getOperand(0), MinAssertVTVal);
9402     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9403   }
9404
9405   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
9406   // than X. Just move the AssertZext in front of the truncate and drop the
9407   // AssertSExt.
9408   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9409       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
9410       Opcode == ISD::AssertZext) {
9411     SDValue BigA = N0.getOperand(0);
9412     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9413     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9414            "Asserting zero/sign-extended bits to a type larger than the "
9415            "truncated destination does not provide information");
9416
9417     if (AssertVT.bitsLT(BigA_AssertVT)) {
9418       SDLoc DL(N);
9419       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9420                                       BigA.getOperand(0), N1);
9421       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9422     }
9423   }
9424
9425   return SDValue();
9426 }
9427
9428 /// If the result of a wider load is shifted to right of N  bits and then
9429 /// truncated to a narrower type and where N is a multiple of number of bits of
9430 /// the narrower type, transform it to a narrower load from address + N / num of
9431 /// bits of new type. Also narrow the load if the result is masked with an AND
9432 /// to effectively produce a smaller type. If the result is to be extended, also
9433 /// fold the extension to form a extending load.
9434 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
9435   unsigned Opc = N->getOpcode();
9436
9437   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
9438   SDValue N0 = N->getOperand(0);
9439   EVT VT = N->getValueType(0);
9440   EVT ExtVT = VT;
9441
9442   // This transformation isn't valid for vector loads.
9443   if (VT.isVector())
9444     return SDValue();
9445
9446   unsigned ShAmt = 0;
9447   bool HasShiftedOffset = false;
9448   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9449   // extended to VT.
9450   if (Opc == ISD::SIGN_EXTEND_INREG) {
9451     ExtType = ISD::SEXTLOAD;
9452     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9453   } else if (Opc == ISD::SRL) {
9454     // Another special-case: SRL is basically zero-extending a narrower value,
9455     // or it maybe shifting a higher subword, half or byte into the lowest
9456     // bits.
9457     ExtType = ISD::ZEXTLOAD;
9458     N0 = SDValue(N, 0);
9459
9460     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
9461     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9462     if (!N01 || !LN0)
9463       return SDValue();
9464
9465     uint64_t ShiftAmt = N01->getZExtValue();
9466     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
9467     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
9468       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
9469     else
9470       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
9471                                 VT.getSizeInBits() - ShiftAmt);
9472   } else if (Opc == ISD::AND) {
9473     // An AND with a constant mask is the same as a truncate + zero-extend.
9474     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
9475     if (!AndC)
9476       return SDValue();
9477
9478     const APInt &Mask = AndC->getAPIntValue();
9479     unsigned ActiveBits = 0;
9480     if (Mask.isMask()) {
9481       ActiveBits = Mask.countTrailingOnes();
9482     } else if (Mask.isShiftedMask()) {
9483       ShAmt = Mask.countTrailingZeros();
9484       APInt ShiftedMask = Mask.lshr(ShAmt);
9485       ActiveBits = ShiftedMask.countTrailingOnes();
9486       HasShiftedOffset = true;
9487     } else
9488       return SDValue();
9489
9490     ExtType = ISD::ZEXTLOAD;
9491     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
9492   }
9493
9494   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
9495     SDValue SRL = N0;
9496     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
9497       ShAmt = ConstShift->getZExtValue();
9498       unsigned EVTBits = ExtVT.getSizeInBits();
9499       // Is the shift amount a multiple of size of VT?
9500       if ((ShAmt & (EVTBits-1)) == 0) {
9501         N0 = N0.getOperand(0);
9502         // Is the load width a multiple of size of VT?
9503         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
9504           return SDValue();
9505       }
9506
9507       // At this point, we must have a load or else we can't do the transform.
9508       if (!isa<LoadSDNode>(N0)) return SDValue();
9509
9510       auto *LN0 = cast<LoadSDNode>(N0);
9511
9512       // Because a SRL must be assumed to *need* to zero-extend the high bits
9513       // (as opposed to anyext the high bits), we can't combine the zextload
9514       // lowering of SRL and an sextload.
9515       if (LN0->getExtensionType() == ISD::SEXTLOAD)
9516         return SDValue();
9517
9518       // If the shift amount is larger than the input type then we're not
9519       // accessing any of the loaded bytes.  If the load was a zextload/extload
9520       // then the result of the shift+trunc is zero/undef (handled elsewhere).
9521       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
9522         return SDValue();
9523
9524       // If the SRL is only used by a masking AND, we may be able to adjust
9525       // the ExtVT to make the AND redundant.
9526       SDNode *Mask = *(SRL->use_begin());
9527       if (Mask->getOpcode() == ISD::AND &&
9528           isa<ConstantSDNode>(Mask->getOperand(1))) {
9529         const APInt &ShiftMask =
9530           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
9531         if (ShiftMask.isMask()) {
9532           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
9533                                            ShiftMask.countTrailingOnes());
9534           // If the mask is smaller, recompute the type.
9535           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
9536               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
9537             ExtVT = MaskedVT;
9538         }
9539       }
9540     }
9541   }
9542
9543   // If the load is shifted left (and the result isn't shifted back right),
9544   // we can fold the truncate through the shift.
9545   unsigned ShLeftAmt = 0;
9546   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9547       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
9548     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
9549       ShLeftAmt = N01->getZExtValue();
9550       N0 = N0.getOperand(0);
9551     }
9552   }
9553
9554   // If we haven't found a load, we can't narrow it.
9555   if (!isa<LoadSDNode>(N0))
9556     return SDValue();
9557
9558   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9559   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
9560     return SDValue();
9561
9562   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
9563     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
9564     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
9565     return LVTStoreBits - EVTStoreBits - ShAmt;
9566   };
9567
9568   // For big endian targets, we need to adjust the offset to the pointer to
9569   // load the correct bytes.
9570   if (DAG.getDataLayout().isBigEndian())
9571     ShAmt = AdjustBigEndianShift(ShAmt);
9572
9573   EVT PtrType = N0.getOperand(1).getValueType();
9574   uint64_t PtrOff = ShAmt / 8;
9575   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
9576   SDLoc DL(LN0);
9577   // The original load itself didn't wrap, so an offset within it doesn't.
9578   SDNodeFlags Flags;
9579   Flags.setNoUnsignedWrap(true);
9580   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
9581                                PtrType, LN0->getBasePtr(),
9582                                DAG.getConstant(PtrOff, DL, PtrType),
9583                                Flags);
9584   AddToWorklist(NewPtr.getNode());
9585
9586   SDValue Load;
9587   if (ExtType == ISD::NON_EXTLOAD)
9588     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
9589                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9590                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9591   else
9592     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
9593                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
9594                           NewAlign, LN0->getMemOperand()->getFlags(),
9595                           LN0->getAAInfo());
9596
9597   // Replace the old load's chain with the new load's chain.
9598   WorklistRemover DeadNodes(*this);
9599   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9600
9601   // Shift the result left, if we've swallowed a left shift.
9602   SDValue Result = Load;
9603   if (ShLeftAmt != 0) {
9604     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
9605     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
9606       ShImmTy = VT;
9607     // If the shift amount is as large as the result size (but, presumably,
9608     // no larger than the source) then the useful bits of the result are
9609     // zero; we can't simply return the shortened shift, because the result
9610     // of that operation is undefined.
9611     SDLoc DL(N0);
9612     if (ShLeftAmt >= VT.getSizeInBits())
9613       Result = DAG.getConstant(0, DL, VT);
9614     else
9615       Result = DAG.getNode(ISD::SHL, DL, VT,
9616                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
9617   }
9618
9619   if (HasShiftedOffset) {
9620     // Recalculate the shift amount after it has been altered to calculate
9621     // the offset.
9622     if (DAG.getDataLayout().isBigEndian())
9623       ShAmt = AdjustBigEndianShift(ShAmt);
9624
9625     // We're using a shifted mask, so the load now has an offset. This means
9626     // that data has been loaded into the lower bytes than it would have been
9627     // before, so we need to shl the loaded data into the correct position in the
9628     // register.
9629     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
9630     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
9631     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
9632   }
9633
9634   // Return the new loaded value.
9635   return Result;
9636 }
9637
9638 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
9639   SDValue N0 = N->getOperand(0);
9640   SDValue N1 = N->getOperand(1);
9641   EVT VT = N->getValueType(0);
9642   EVT EVT = cast<VTSDNode>(N1)->getVT();
9643   unsigned VTBits = VT.getScalarSizeInBits();
9644   unsigned EVTBits = EVT.getScalarSizeInBits();
9645
9646   if (N0.isUndef())
9647     return DAG.getUNDEF(VT);
9648
9649   // fold (sext_in_reg c1) -> c1
9650   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9651     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
9652
9653   // If the input is already sign extended, just drop the extension.
9654   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
9655     return N0;
9656
9657   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9658   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
9659       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
9660     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9661                        N0.getOperand(0), N1);
9662
9663   // fold (sext_in_reg (sext x)) -> (sext x)
9664   // fold (sext_in_reg (aext x)) -> (sext x)
9665   // if x is small enough or if we know that x has more than 1 sign bit and the
9666   // sign_extend_inreg is extending from one of them.
9667   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
9668     SDValue N00 = N0.getOperand(0);
9669     unsigned N00Bits = N00.getScalarValueSizeInBits();
9670     if ((N00Bits <= EVTBits ||
9671          (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
9672         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9673       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
9674   }
9675
9676   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9677   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
9678        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9679        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
9680       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
9681     if (!LegalOperations ||
9682         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
9683       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
9684                          N0.getOperand(0));
9685   }
9686
9687   // fold (sext_in_reg (zext x)) -> (sext x)
9688   // iff we are extending the source sign bit.
9689   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
9690     SDValue N00 = N0.getOperand(0);
9691     if (N00.getScalarValueSizeInBits() == EVTBits &&
9692         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9693       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9694   }
9695
9696   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
9697   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
9698     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
9699
9700   // fold operands of sext_in_reg based on knowledge that the top bits are not
9701   // demanded.
9702   if (SimplifyDemandedBits(SDValue(N, 0)))
9703     return SDValue(N, 0);
9704
9705   // fold (sext_in_reg (load x)) -> (smaller sextload x)
9706   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
9707   if (SDValue NarrowLoad = ReduceLoadWidth(N))
9708     return NarrowLoad;
9709
9710   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
9711   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
9712   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
9713   if (N0.getOpcode() == ISD::SRL) {
9714     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
9715       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
9716         // We can turn this into an SRA iff the input to the SRL is already sign
9717         // extended enough.
9718         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
9719         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
9720           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
9721                              N0.getOperand(0), N0.getOperand(1));
9722       }
9723   }
9724
9725   // fold (sext_inreg (extload x)) -> (sextload x)
9726   // If sextload is not supported by target, we can only do the combine when
9727   // load has one use. Doing otherwise can block folding the extload with other
9728   // extends that the target does support.
9729   if (ISD::isEXTLoad(N0.getNode()) &&
9730       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9731       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9732       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
9733         N0.hasOneUse()) ||
9734        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9735     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9736     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9737                                      LN0->getChain(),
9738                                      LN0->getBasePtr(), EVT,
9739                                      LN0->getMemOperand());
9740     CombineTo(N, ExtLoad);
9741     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9742     AddToWorklist(ExtLoad.getNode());
9743     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9744   }
9745   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
9746   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
9747       N0.hasOneUse() &&
9748       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9749       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9750        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9751     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9752     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9753                                      LN0->getChain(),
9754                                      LN0->getBasePtr(), EVT,
9755                                      LN0->getMemOperand());
9756     CombineTo(N, ExtLoad);
9757     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9758     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9759   }
9760
9761   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
9762   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
9763     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
9764                                            N0.getOperand(1), false))
9765       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9766                          BSwap, N1);
9767   }
9768
9769   return SDValue();
9770 }
9771
9772 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
9773   SDValue N0 = N->getOperand(0);
9774   EVT VT = N->getValueType(0);
9775
9776   if (N0.isUndef())
9777     return DAG.getUNDEF(VT);
9778
9779   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9780     return Res;
9781
9782   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
9783     return SDValue(N, 0);
9784
9785   return SDValue();
9786 }
9787
9788 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
9789   SDValue N0 = N->getOperand(0);
9790   EVT VT = N->getValueType(0);
9791
9792   if (N0.isUndef())
9793     return DAG.getUNDEF(VT);
9794
9795   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9796     return Res;
9797
9798   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
9799     return SDValue(N, 0);
9800
9801   return SDValue();
9802 }
9803
9804 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
9805   SDValue N0 = N->getOperand(0);
9806   EVT VT = N->getValueType(0);
9807   bool isLE = DAG.getDataLayout().isLittleEndian();
9808
9809   // noop truncate
9810   if (N0.getValueType() == N->getValueType(0))
9811     return N0;
9812
9813   // fold (truncate (truncate x)) -> (truncate x)
9814   if (N0.getOpcode() == ISD::TRUNCATE)
9815     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9816
9817   // fold (truncate c1) -> c1
9818   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
9819     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
9820     if (C.getNode() != N)
9821       return C;
9822   }
9823
9824   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
9825   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
9826       N0.getOpcode() == ISD::SIGN_EXTEND ||
9827       N0.getOpcode() == ISD::ANY_EXTEND) {
9828     // if the source is smaller than the dest, we still need an extend.
9829     if (N0.getOperand(0).getValueType().bitsLT(VT))
9830       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9831     // if the source is larger than the dest, than we just need the truncate.
9832     if (N0.getOperand(0).getValueType().bitsGT(VT))
9833       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9834     // if the source and dest are the same type, we can drop both the extend
9835     // and the truncate.
9836     return N0.getOperand(0);
9837   }
9838
9839   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
9840   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
9841     return SDValue();
9842
9843   // Fold extract-and-trunc into a narrow extract. For example:
9844   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
9845   //   i32 y = TRUNCATE(i64 x)
9846   //        -- becomes --
9847   //   v16i8 b = BITCAST (v2i64 val)
9848   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
9849   //
9850   // Note: We only run this optimization after type legalization (which often
9851   // creates this pattern) and before operation legalization after which
9852   // we need to be more careful about the vector instructions that we generate.
9853   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9854       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
9855     EVT VecTy = N0.getOperand(0).getValueType();
9856     EVT ExTy = N0.getValueType();
9857     EVT TrTy = N->getValueType(0);
9858
9859     unsigned NumElem = VecTy.getVectorNumElements();
9860     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
9861
9862     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
9863     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
9864
9865     SDValue EltNo = N0->getOperand(1);
9866     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
9867       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9868       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
9869       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
9870
9871       SDLoc DL(N);
9872       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
9873                          DAG.getBitcast(NVT, N0.getOperand(0)),
9874                          DAG.getConstant(Index, DL, IndexTy));
9875     }
9876   }
9877
9878   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
9879   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
9880     EVT SrcVT = N0.getValueType();
9881     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
9882         TLI.isTruncateFree(SrcVT, VT)) {
9883       SDLoc SL(N0);
9884       SDValue Cond = N0.getOperand(0);
9885       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9886       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
9887       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
9888     }
9889   }
9890
9891   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
9892   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9893       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
9894       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
9895     SDValue Amt = N0.getOperand(1);
9896     KnownBits Known = DAG.computeKnownBits(Amt);
9897     unsigned Size = VT.getScalarSizeInBits();
9898     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
9899       SDLoc SL(N);
9900       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
9901
9902       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9903       if (AmtVT != Amt.getValueType()) {
9904         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
9905         AddToWorklist(Amt.getNode());
9906       }
9907       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
9908     }
9909   }
9910
9911   // Fold a series of buildvector, bitcast, and truncate if possible.
9912   // For example fold
9913   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
9914   //   (2xi32 (buildvector x, y)).
9915   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
9916       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
9917       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
9918       N0.getOperand(0).hasOneUse()) {
9919     SDValue BuildVect = N0.getOperand(0);
9920     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
9921     EVT TruncVecEltTy = VT.getVectorElementType();
9922
9923     // Check that the element types match.
9924     if (BuildVectEltTy == TruncVecEltTy) {
9925       // Now we only need to compute the offset of the truncated elements.
9926       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
9927       unsigned TruncVecNumElts = VT.getVectorNumElements();
9928       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
9929
9930       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
9931              "Invalid number of elements");
9932
9933       SmallVector<SDValue, 8> Opnds;
9934       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
9935         Opnds.push_back(BuildVect.getOperand(i));
9936
9937       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
9938     }
9939   }
9940
9941   // See if we can simplify the input to this truncate through knowledge that
9942   // only the low bits are being used.
9943   // For example "trunc (or (shl x, 8), y)" // -> trunc y
9944   // Currently we only perform this optimization on scalars because vectors
9945   // may have different active low bits.
9946   if (!VT.isVector()) {
9947     APInt Mask =
9948         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
9949     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
9950       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
9951   }
9952
9953   // fold (truncate (load x)) -> (smaller load x)
9954   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
9955   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
9956     if (SDValue Reduced = ReduceLoadWidth(N))
9957       return Reduced;
9958
9959     // Handle the case where the load remains an extending load even
9960     // after truncation.
9961     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
9962       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9963       if (!LN0->isVolatile() &&
9964           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
9965         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
9966                                          VT, LN0->getChain(), LN0->getBasePtr(),
9967                                          LN0->getMemoryVT(),
9968                                          LN0->getMemOperand());
9969         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
9970         return NewLoad;
9971       }
9972     }
9973   }
9974
9975   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
9976   // where ... are all 'undef'.
9977   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
9978     SmallVector<EVT, 8> VTs;
9979     SDValue V;
9980     unsigned Idx = 0;
9981     unsigned NumDefs = 0;
9982
9983     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
9984       SDValue X = N0.getOperand(i);
9985       if (!X.isUndef()) {
9986         V = X;
9987         Idx = i;
9988         NumDefs++;
9989       }
9990       // Stop if more than one members are non-undef.
9991       if (NumDefs > 1)
9992         break;
9993       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
9994                                      VT.getVectorElementType(),
9995                                      X.getValueType().getVectorNumElements()));
9996     }
9997
9998     if (NumDefs == 0)
9999       return DAG.getUNDEF(VT);
10000
10001     if (NumDefs == 1) {
10002       assert(V.getNode() && "The single defined operand is empty!");
10003       SmallVector<SDValue, 8> Opnds;
10004       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
10005         if (i != Idx) {
10006           Opnds.push_back(DAG.getUNDEF(VTs[i]));
10007           continue;
10008         }
10009         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
10010         AddToWorklist(NV.getNode());
10011         Opnds.push_back(NV);
10012       }
10013       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
10014     }
10015   }
10016
10017   // Fold truncate of a bitcast of a vector to an extract of the low vector
10018   // element.
10019   //
10020   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10021   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
10022     SDValue VecSrc = N0.getOperand(0);
10023     EVT SrcVT = VecSrc.getValueType();
10024     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
10025         (!LegalOperations ||
10026          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
10027       SDLoc SL(N);
10028
10029       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
10030       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
10031       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
10032                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
10033     }
10034   }
10035
10036   // Simplify the operands using demanded-bits information.
10037   if (!VT.isVector() &&
10038       SimplifyDemandedBits(SDValue(N, 0)))
10039     return SDValue(N, 0);
10040
10041   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10042   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10043   // When the adde's carry is not used.
10044   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
10045       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
10046       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
10047     SDLoc SL(N);
10048     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10049     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10050     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
10051     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
10052   }
10053
10054   // fold (truncate (extract_subvector(ext x))) ->
10055   //      (extract_subvector x)
10056   // TODO: This can be generalized to cover cases where the truncate and extract
10057   // do not fully cancel each other out.
10058   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10059     SDValue N00 = N0.getOperand(0);
10060     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
10061         N00.getOpcode() == ISD::ZERO_EXTEND ||
10062         N00.getOpcode() == ISD::ANY_EXTEND) {
10063       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
10064           VT.getVectorElementType())
10065         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
10066                            N00.getOperand(0), N0.getOperand(1));
10067     }
10068   }
10069
10070   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10071     return NewVSel;
10072
10073   // Narrow a suitable binary operation with a non-opaque constant operand by
10074   // moving it ahead of the truncate. This is limited to pre-legalization
10075   // because targets may prefer a wider type during later combines and invert
10076   // this transform.
10077   switch (N0.getOpcode()) {
10078   case ISD::ADD:
10079   case ISD::SUB:
10080   case ISD::MUL:
10081   case ISD::AND:
10082   case ISD::OR:
10083   case ISD::XOR:
10084     if (!LegalOperations && N0.hasOneUse() &&
10085         (isConstantOrConstantVector(N0.getOperand(0), true) ||
10086          isConstantOrConstantVector(N0.getOperand(1), true))) {
10087       // TODO: We already restricted this to pre-legalization, but for vectors
10088       // we are extra cautious to not create an unsupported operation.
10089       // Target-specific changes are likely needed to avoid regressions here.
10090       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
10091         SDLoc DL(N);
10092         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
10093         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
10094         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
10095       }
10096     }
10097   }
10098
10099   return SDValue();
10100 }
10101
10102 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
10103   SDValue Elt = N->getOperand(i);
10104   if (Elt.getOpcode() != ISD::MERGE_VALUES)
10105     return Elt.getNode();
10106   return Elt.getOperand(Elt.getResNo()).getNode();
10107 }
10108
10109 /// build_pair (load, load) -> load
10110 /// if load locations are consecutive.
10111 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
10112   assert(N->getOpcode() == ISD::BUILD_PAIR);
10113
10114   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
10115   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
10116
10117   // A BUILD_PAIR is always having the least significant part in elt 0 and the
10118   // most significant part in elt 1. So when combining into one large load, we
10119   // need to consider the endianness.
10120   if (DAG.getDataLayout().isBigEndian())
10121     std::swap(LD1, LD2);
10122
10123   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
10124       LD1->getAddressSpace() != LD2->getAddressSpace())
10125     return SDValue();
10126   EVT LD1VT = LD1->getValueType(0);
10127   unsigned LD1Bytes = LD1VT.getStoreSize();
10128   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
10129       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
10130     unsigned Align = LD1->getAlignment();
10131     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
10132         VT.getTypeForEVT(*DAG.getContext()));
10133
10134     if (NewAlign <= Align &&
10135         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
10136       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10137                          LD1->getPointerInfo(), Align);
10138   }
10139
10140   return SDValue();
10141 }
10142
10143 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10144   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10145   // and Lo parts; on big-endian machines it doesn't.
10146   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
10147 }
10148
10149 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
10150                                     const TargetLowering &TLI) {
10151   // If this is not a bitcast to an FP type or if the target doesn't have
10152   // IEEE754-compliant FP logic, we're done.
10153   EVT VT = N->getValueType(0);
10154   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
10155     return SDValue();
10156
10157   // TODO: Handle cases where the integer constant is a different scalar
10158   // bitwidth to the FP.
10159   SDValue N0 = N->getOperand(0);
10160   EVT SourceVT = N0.getValueType();
10161   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10162     return SDValue();
10163
10164   unsigned FPOpcode;
10165   APInt SignMask;
10166   switch (N0.getOpcode()) {
10167   case ISD::AND:
10168     FPOpcode = ISD::FABS;
10169     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10170     break;
10171   case ISD::XOR:
10172     FPOpcode = ISD::FNEG;
10173     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10174     break;
10175   case ISD::OR:
10176     FPOpcode = ISD::FABS;
10177     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10178     break;
10179   default:
10180     return SDValue();
10181   }
10182
10183   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10184   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10185   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10186   //   fneg (fabs X)
10187   SDValue LogicOp0 = N0.getOperand(0);
10188   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
10189   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
10190       LogicOp0.getOpcode() == ISD::BITCAST &&
10191       LogicOp0.getOperand(0).getValueType() == VT) {
10192     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
10193     NumFPLogicOpsConv++;
10194     if (N0.getOpcode() == ISD::OR)
10195       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
10196     return FPOp;
10197   }
10198
10199   return SDValue();
10200 }
10201
10202 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
10203   SDValue N0 = N->getOperand(0);
10204   EVT VT = N->getValueType(0);
10205
10206   if (N0.isUndef())
10207     return DAG.getUNDEF(VT);
10208
10209   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
10210   // Only do this before legalize types, since we might create an illegal
10211   // scalar type. Even if we knew we wouldn't create an illegal scalar type
10212   // we can only do this before legalize ops, since the target maybe
10213   // depending on the bitcast.
10214   // First check to see if this is all constant.
10215   if (!LegalTypes &&
10216       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
10217       VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
10218     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
10219                                              VT.getVectorElementType());
10220
10221   // If the input is a constant, let getNode fold it.
10222   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
10223     // If we can't allow illegal operations, we need to check that this is just
10224     // a fp -> int or int -> conversion and that the resulting operation will
10225     // be legal.
10226     if (!LegalOperations ||
10227         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
10228          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
10229         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
10230          TLI.isOperationLegal(ISD::Constant, VT))) {
10231       SDValue C = DAG.getBitcast(VT, N0);
10232       if (C.getNode() != N)
10233         return C;
10234     }
10235   }
10236
10237   // (conv (conv x, t1), t2) -> (conv x, t2)
10238   if (N0.getOpcode() == ISD::BITCAST)
10239     return DAG.getBitcast(VT, N0.getOperand(0));
10240
10241   // fold (conv (load x)) -> (load (conv*)x)
10242   // If the resultant load doesn't need a higher alignment than the original!
10243   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10244       // Do not remove the cast if the types differ in endian layout.
10245       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
10246           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
10247       // If the load is volatile, we only want to change the load type if the
10248       // resulting load is legal. Otherwise we might increase the number of
10249       // memory accesses. We don't care if the original type was legal or not
10250       // as we assume software couldn't rely on the number of accesses of an
10251       // illegal type.
10252       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
10253        TLI.isOperationLegal(ISD::LOAD, VT)) &&
10254       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
10255     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10256     unsigned OrigAlign = LN0->getAlignment();
10257
10258     bool Fast = false;
10259     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10260                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
10261         Fast) {
10262       SDValue Load =
10263           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
10264                       LN0->getPointerInfo(), OrigAlign,
10265                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10266       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10267       return Load;
10268     }
10269   }
10270
10271   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
10272     return V;
10273
10274   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
10275   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
10276   //
10277   // For ppc_fp128:
10278   // fold (bitcast (fneg x)) ->
10279   //     flipbit = signbit
10280   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10281   //
10282   // fold (bitcast (fabs x)) ->
10283   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
10284   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10285   // This often reduces constant pool loads.
10286   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
10287        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
10288       N0.getNode()->hasOneUse() && VT.isInteger() &&
10289       !VT.isVector() && !N0.getValueType().isVector()) {
10290     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
10291     AddToWorklist(NewConv.getNode());
10292
10293     SDLoc DL(N);
10294     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10295       assert(VT.getSizeInBits() == 128);
10296       SDValue SignBit = DAG.getConstant(
10297           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
10298       SDValue FlipBit;
10299       if (N0.getOpcode() == ISD::FNEG) {
10300         FlipBit = SignBit;
10301         AddToWorklist(FlipBit.getNode());
10302       } else {
10303         assert(N0.getOpcode() == ISD::FABS);
10304         SDValue Hi =
10305             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
10306                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10307                                               SDLoc(NewConv)));
10308         AddToWorklist(Hi.getNode());
10309         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
10310         AddToWorklist(FlipBit.getNode());
10311       }
10312       SDValue FlipBits =
10313           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10314       AddToWorklist(FlipBits.getNode());
10315       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
10316     }
10317     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10318     if (N0.getOpcode() == ISD::FNEG)
10319       return DAG.getNode(ISD::XOR, DL, VT,
10320                          NewConv, DAG.getConstant(SignBit, DL, VT));
10321     assert(N0.getOpcode() == ISD::FABS);
10322     return DAG.getNode(ISD::AND, DL, VT,
10323                        NewConv, DAG.getConstant(~SignBit, DL, VT));
10324   }
10325
10326   // fold (bitconvert (fcopysign cst, x)) ->
10327   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
10328   // Note that we don't handle (copysign x, cst) because this can always be
10329   // folded to an fneg or fabs.
10330   //
10331   // For ppc_fp128:
10332   // fold (bitcast (fcopysign cst, x)) ->
10333   //     flipbit = (and (extract_element
10334   //                     (xor (bitcast cst), (bitcast x)), 0),
10335   //                    signbit)
10336   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
10337   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
10338       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
10339       VT.isInteger() && !VT.isVector()) {
10340     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
10341     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
10342     if (isTypeLegal(IntXVT)) {
10343       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
10344       AddToWorklist(X.getNode());
10345
10346       // If X has a different width than the result/lhs, sext it or truncate it.
10347       unsigned VTWidth = VT.getSizeInBits();
10348       if (OrigXWidth < VTWidth) {
10349         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
10350         AddToWorklist(X.getNode());
10351       } else if (OrigXWidth > VTWidth) {
10352         // To get the sign bit in the right place, we have to shift it right
10353         // before truncating.
10354         SDLoc DL(X);
10355         X = DAG.getNode(ISD::SRL, DL,
10356                         X.getValueType(), X,
10357                         DAG.getConstant(OrigXWidth-VTWidth, DL,
10358                                         X.getValueType()));
10359         AddToWorklist(X.getNode());
10360         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
10361         AddToWorklist(X.getNode());
10362       }
10363
10364       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10365         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
10366         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10367         AddToWorklist(Cst.getNode());
10368         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
10369         AddToWorklist(X.getNode());
10370         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
10371         AddToWorklist(XorResult.getNode());
10372         SDValue XorResult64 = DAG.getNode(
10373             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
10374             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10375                                   SDLoc(XorResult)));
10376         AddToWorklist(XorResult64.getNode());
10377         SDValue FlipBit =
10378             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
10379                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
10380         AddToWorklist(FlipBit.getNode());
10381         SDValue FlipBits =
10382             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10383         AddToWorklist(FlipBits.getNode());
10384         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
10385       }
10386       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10387       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
10388                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
10389       AddToWorklist(X.getNode());
10390
10391       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10392       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
10393                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
10394       AddToWorklist(Cst.getNode());
10395
10396       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
10397     }
10398   }
10399
10400   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
10401   if (N0.getOpcode() == ISD::BUILD_PAIR)
10402     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
10403       return CombineLD;
10404
10405   // Remove double bitcasts from shuffles - this is often a legacy of
10406   // XformToShuffleWithZero being used to combine bitmaskings (of
10407   // float vectors bitcast to integer vectors) into shuffles.
10408   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
10409   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
10410       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
10411       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
10412       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
10413     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
10414
10415     // If operands are a bitcast, peek through if it casts the original VT.
10416     // If operands are a constant, just bitcast back to original VT.
10417     auto PeekThroughBitcast = [&](SDValue Op) {
10418       if (Op.getOpcode() == ISD::BITCAST &&
10419           Op.getOperand(0).getValueType() == VT)
10420         return SDValue(Op.getOperand(0));
10421       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
10422           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
10423         return DAG.getBitcast(VT, Op);
10424       return SDValue();
10425     };
10426
10427     // FIXME: If either input vector is bitcast, try to convert the shuffle to
10428     // the result type of this bitcast. This would eliminate at least one
10429     // bitcast. See the transform in InstCombine.
10430     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
10431     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
10432     if (!(SV0 && SV1))
10433       return SDValue();
10434
10435     int MaskScale =
10436         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
10437     SmallVector<int, 8> NewMask;
10438     for (int M : SVN->getMask())
10439       for (int i = 0; i != MaskScale; ++i)
10440         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
10441
10442     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10443     if (!LegalMask) {
10444       std::swap(SV0, SV1);
10445       ShuffleVectorSDNode::commuteMask(NewMask);
10446       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10447     }
10448
10449     if (LegalMask)
10450       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
10451   }
10452
10453   return SDValue();
10454 }
10455
10456 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
10457   EVT VT = N->getValueType(0);
10458   return CombineConsecutiveLoads(N, VT);
10459 }
10460
10461 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
10462 /// operands. DstEltVT indicates the destination element value type.
10463 SDValue DAGCombiner::
10464 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
10465   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
10466
10467   // If this is already the right type, we're done.
10468   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
10469
10470   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
10471   unsigned DstBitSize = DstEltVT.getSizeInBits();
10472
10473   // If this is a conversion of N elements of one type to N elements of another
10474   // type, convert each element.  This handles FP<->INT cases.
10475   if (SrcBitSize == DstBitSize) {
10476     SmallVector<SDValue, 8> Ops;
10477     for (SDValue Op : BV->op_values()) {
10478       // If the vector element type is not legal, the BUILD_VECTOR operands
10479       // are promoted and implicitly truncated.  Make that explicit here.
10480       if (Op.getValueType() != SrcEltVT)
10481         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
10482       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
10483       AddToWorklist(Ops.back().getNode());
10484     }
10485     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10486                               BV->getValueType(0).getVectorNumElements());
10487     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
10488   }
10489
10490   // Otherwise, we're growing or shrinking the elements.  To avoid having to
10491   // handle annoying details of growing/shrinking FP values, we convert them to
10492   // int first.
10493   if (SrcEltVT.isFloatingPoint()) {
10494     // Convert the input float vector to a int vector where the elements are the
10495     // same sizes.
10496     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
10497     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
10498     SrcEltVT = IntVT;
10499   }
10500
10501   // Now we know the input is an integer vector.  If the output is a FP type,
10502   // convert to integer first, then to FP of the right size.
10503   if (DstEltVT.isFloatingPoint()) {
10504     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
10505     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
10506
10507     // Next, convert to FP elements of the same size.
10508     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
10509   }
10510
10511   SDLoc DL(BV);
10512
10513   // Okay, we know the src/dst types are both integers of differing types.
10514   // Handling growing first.
10515   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
10516   if (SrcBitSize < DstBitSize) {
10517     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
10518
10519     SmallVector<SDValue, 8> Ops;
10520     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
10521          i += NumInputsPerOutput) {
10522       bool isLE = DAG.getDataLayout().isLittleEndian();
10523       APInt NewBits = APInt(DstBitSize, 0);
10524       bool EltIsUndef = true;
10525       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
10526         // Shift the previously computed bits over.
10527         NewBits <<= SrcBitSize;
10528         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
10529         if (Op.isUndef()) continue;
10530         EltIsUndef = false;
10531
10532         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
10533                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
10534       }
10535
10536       if (EltIsUndef)
10537         Ops.push_back(DAG.getUNDEF(DstEltVT));
10538       else
10539         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
10540     }
10541
10542     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
10543     return DAG.getBuildVector(VT, DL, Ops);
10544   }
10545
10546   // Finally, this must be the case where we are shrinking elements: each input
10547   // turns into multiple outputs.
10548   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
10549   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10550                             NumOutputsPerInput*BV->getNumOperands());
10551   SmallVector<SDValue, 8> Ops;
10552
10553   for (const SDValue &Op : BV->op_values()) {
10554     if (Op.isUndef()) {
10555       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
10556       continue;
10557     }
10558
10559     APInt OpVal = cast<ConstantSDNode>(Op)->
10560                   getAPIntValue().zextOrTrunc(SrcBitSize);
10561
10562     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
10563       APInt ThisVal = OpVal.trunc(DstBitSize);
10564       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
10565       OpVal.lshrInPlace(DstBitSize);
10566     }
10567
10568     // For big endian targets, swap the order of the pieces of each element.
10569     if (DAG.getDataLayout().isBigEndian())
10570       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
10571   }
10572
10573   return DAG.getBuildVector(VT, DL, Ops);
10574 }
10575
10576 static bool isContractable(SDNode *N) {
10577   SDNodeFlags F = N->getFlags();
10578   return F.hasAllowContract() || F.hasAllowReassociation();
10579 }
10580
10581 /// Try to perform FMA combining on a given FADD node.
10582 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
10583   SDValue N0 = N->getOperand(0);
10584   SDValue N1 = N->getOperand(1);
10585   EVT VT = N->getValueType(0);
10586   SDLoc SL(N);
10587
10588   const TargetOptions &Options = DAG.getTarget().Options;
10589
10590   // Floating-point multiply-add with intermediate rounding.
10591   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10592
10593   // Floating-point multiply-add without intermediate rounding.
10594   bool HasFMA =
10595       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10596       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10597
10598   // No valid opcode, do not combine.
10599   if (!HasFMAD && !HasFMA)
10600     return SDValue();
10601
10602   SDNodeFlags Flags = N->getFlags();
10603   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10604   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10605                               CanFuse || HasFMAD);
10606   // If the addition is not contractable, do not combine.
10607   if (!AllowFusionGlobally && !isContractable(N))
10608     return SDValue();
10609
10610   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10611   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10612     return SDValue();
10613
10614   // Always prefer FMAD to FMA for precision.
10615   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10616   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10617
10618   // Is the node an FMUL and contractable either due to global flags or
10619   // SDNodeFlags.
10620   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10621     if (N.getOpcode() != ISD::FMUL)
10622       return false;
10623     return AllowFusionGlobally || isContractable(N.getNode());
10624   };
10625   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10626   // prefer to fold the multiply with fewer uses.
10627   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
10628     if (N0.getNode()->use_size() > N1.getNode()->use_size())
10629       std::swap(N0, N1);
10630   }
10631
10632   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10633   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10634     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10635                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
10636   }
10637
10638   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10639   // Note: Commutes FADD operands.
10640   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10641     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10642                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
10643   }
10644
10645   // Look through FP_EXTEND nodes to do more combining.
10646
10647   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10648   if (N0.getOpcode() == ISD::FP_EXTEND) {
10649     SDValue N00 = N0.getOperand(0);
10650     if (isContractableFMUL(N00) &&
10651         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10652       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10653                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10654                                      N00.getOperand(0)),
10655                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10656                                      N00.getOperand(1)), N1, Flags);
10657     }
10658   }
10659
10660   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10661   // Note: Commutes FADD operands.
10662   if (N1.getOpcode() == ISD::FP_EXTEND) {
10663     SDValue N10 = N1.getOperand(0);
10664     if (isContractableFMUL(N10) &&
10665         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10666       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10667                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10668                                      N10.getOperand(0)),
10669                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10670                                      N10.getOperand(1)), N0, Flags);
10671     }
10672   }
10673
10674   // More folding opportunities when target permits.
10675   if (Aggressive) {
10676     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
10677     if (CanFuse &&
10678         N0.getOpcode() == PreferredFusedOpcode &&
10679         N0.getOperand(2).getOpcode() == ISD::FMUL &&
10680         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
10681       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10682                          N0.getOperand(0), N0.getOperand(1),
10683                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10684                                      N0.getOperand(2).getOperand(0),
10685                                      N0.getOperand(2).getOperand(1),
10686                                      N1, Flags), Flags);
10687     }
10688
10689     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
10690     if (CanFuse &&
10691         N1->getOpcode() == PreferredFusedOpcode &&
10692         N1.getOperand(2).getOpcode() == ISD::FMUL &&
10693         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
10694       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10695                          N1.getOperand(0), N1.getOperand(1),
10696                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10697                                      N1.getOperand(2).getOperand(0),
10698                                      N1.getOperand(2).getOperand(1),
10699                                      N0, Flags), Flags);
10700     }
10701
10702
10703     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
10704     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
10705     auto FoldFAddFMAFPExtFMul = [&] (
10706       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10707       SDNodeFlags Flags) {
10708       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
10709                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10710                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10711                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10712                                      Z, Flags), Flags);
10713     };
10714     if (N0.getOpcode() == PreferredFusedOpcode) {
10715       SDValue N02 = N0.getOperand(2);
10716       if (N02.getOpcode() == ISD::FP_EXTEND) {
10717         SDValue N020 = N02.getOperand(0);
10718         if (isContractableFMUL(N020) &&
10719             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10720           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
10721                                       N020.getOperand(0), N020.getOperand(1),
10722                                       N1, Flags);
10723         }
10724       }
10725     }
10726
10727     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
10728     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
10729     // FIXME: This turns two single-precision and one double-precision
10730     // operation into two double-precision operations, which might not be
10731     // interesting for all targets, especially GPUs.
10732     auto FoldFAddFPExtFMAFMul = [&] (
10733       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10734       SDNodeFlags Flags) {
10735       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10736                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
10737                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
10738                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10739                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10740                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10741                                      Z, Flags), Flags);
10742     };
10743     if (N0.getOpcode() == ISD::FP_EXTEND) {
10744       SDValue N00 = N0.getOperand(0);
10745       if (N00.getOpcode() == PreferredFusedOpcode) {
10746         SDValue N002 = N00.getOperand(2);
10747         if (isContractableFMUL(N002) &&
10748             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10749           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
10750                                       N002.getOperand(0), N002.getOperand(1),
10751                                       N1, Flags);
10752         }
10753       }
10754     }
10755
10756     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
10757     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
10758     if (N1.getOpcode() == PreferredFusedOpcode) {
10759       SDValue N12 = N1.getOperand(2);
10760       if (N12.getOpcode() == ISD::FP_EXTEND) {
10761         SDValue N120 = N12.getOperand(0);
10762         if (isContractableFMUL(N120) &&
10763             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10764           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
10765                                       N120.getOperand(0), N120.getOperand(1),
10766                                       N0, Flags);
10767         }
10768       }
10769     }
10770
10771     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
10772     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
10773     // FIXME: This turns two single-precision and one double-precision
10774     // operation into two double-precision operations, which might not be
10775     // interesting for all targets, especially GPUs.
10776     if (N1.getOpcode() == ISD::FP_EXTEND) {
10777       SDValue N10 = N1.getOperand(0);
10778       if (N10.getOpcode() == PreferredFusedOpcode) {
10779         SDValue N102 = N10.getOperand(2);
10780         if (isContractableFMUL(N102) &&
10781             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10782           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
10783                                       N102.getOperand(0), N102.getOperand(1),
10784                                       N0, Flags);
10785         }
10786       }
10787     }
10788   }
10789
10790   return SDValue();
10791 }
10792
10793 /// Try to perform FMA combining on a given FSUB node.
10794 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
10795   SDValue N0 = N->getOperand(0);
10796   SDValue N1 = N->getOperand(1);
10797   EVT VT = N->getValueType(0);
10798   SDLoc SL(N);
10799
10800   const TargetOptions &Options = DAG.getTarget().Options;
10801   // Floating-point multiply-add with intermediate rounding.
10802   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10803
10804   // Floating-point multiply-add without intermediate rounding.
10805   bool HasFMA =
10806       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10807       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10808
10809   // No valid opcode, do not combine.
10810   if (!HasFMAD && !HasFMA)
10811     return SDValue();
10812
10813   const SDNodeFlags Flags = N->getFlags();
10814   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10815   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10816                               CanFuse || HasFMAD);
10817
10818   // If the subtraction is not contractable, do not combine.
10819   if (!AllowFusionGlobally && !isContractable(N))
10820     return SDValue();
10821
10822   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10823   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10824     return SDValue();
10825
10826   // Always prefer FMAD to FMA for precision.
10827   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10828   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10829
10830   // Is the node an FMUL and contractable either due to global flags or
10831   // SDNodeFlags.
10832   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10833     if (N.getOpcode() != ISD::FMUL)
10834       return false;
10835     return AllowFusionGlobally || isContractable(N.getNode());
10836   };
10837
10838   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
10839   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10840     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10841                        N0.getOperand(0), N0.getOperand(1),
10842                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10843   }
10844
10845   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
10846   // Note: Commutes FSUB operands.
10847   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10848     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10849                        DAG.getNode(ISD::FNEG, SL, VT,
10850                                    N1.getOperand(0)),
10851                        N1.getOperand(1), N0, Flags);
10852   }
10853
10854   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
10855   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
10856       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
10857     SDValue N00 = N0.getOperand(0).getOperand(0);
10858     SDValue N01 = N0.getOperand(0).getOperand(1);
10859     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10860                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
10861                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10862   }
10863
10864   // Look through FP_EXTEND nodes to do more combining.
10865
10866   // fold (fsub (fpext (fmul x, y)), z)
10867   //   -> (fma (fpext x), (fpext y), (fneg z))
10868   if (N0.getOpcode() == ISD::FP_EXTEND) {
10869     SDValue N00 = N0.getOperand(0);
10870     if (isContractableFMUL(N00) &&
10871         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10872       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10873                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10874                                      N00.getOperand(0)),
10875                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10876                                      N00.getOperand(1)),
10877                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10878     }
10879   }
10880
10881   // fold (fsub x, (fpext (fmul y, z)))
10882   //   -> (fma (fneg (fpext y)), (fpext z), x)
10883   // Note: Commutes FSUB operands.
10884   if (N1.getOpcode() == ISD::FP_EXTEND) {
10885     SDValue N10 = N1.getOperand(0);
10886     if (isContractableFMUL(N10) &&
10887         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10888       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10889                          DAG.getNode(ISD::FNEG, SL, VT,
10890                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
10891                                                  N10.getOperand(0))),
10892                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10893                                      N10.getOperand(1)),
10894                          N0, Flags);
10895     }
10896   }
10897
10898   // fold (fsub (fpext (fneg (fmul, x, y))), z)
10899   //   -> (fneg (fma (fpext x), (fpext y), z))
10900   // Note: This could be removed with appropriate canonicalization of the
10901   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10902   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10903   // from implementing the canonicalization in visitFSUB.
10904   if (N0.getOpcode() == ISD::FP_EXTEND) {
10905     SDValue N00 = N0.getOperand(0);
10906     if (N00.getOpcode() == ISD::FNEG) {
10907       SDValue N000 = N00.getOperand(0);
10908       if (isContractableFMUL(N000) &&
10909           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10910         return DAG.getNode(ISD::FNEG, SL, VT,
10911                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10912                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10913                                                    N000.getOperand(0)),
10914                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10915                                                    N000.getOperand(1)),
10916                                        N1, Flags));
10917       }
10918     }
10919   }
10920
10921   // fold (fsub (fneg (fpext (fmul, x, y))), z)
10922   //   -> (fneg (fma (fpext x)), (fpext y), z)
10923   // Note: This could be removed with appropriate canonicalization of the
10924   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10925   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10926   // from implementing the canonicalization in visitFSUB.
10927   if (N0.getOpcode() == ISD::FNEG) {
10928     SDValue N00 = N0.getOperand(0);
10929     if (N00.getOpcode() == ISD::FP_EXTEND) {
10930       SDValue N000 = N00.getOperand(0);
10931       if (isContractableFMUL(N000) &&
10932           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
10933         return DAG.getNode(ISD::FNEG, SL, VT,
10934                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10935                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10936                                                    N000.getOperand(0)),
10937                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10938                                                    N000.getOperand(1)),
10939                                        N1, Flags));
10940       }
10941     }
10942   }
10943
10944   // More folding opportunities when target permits.
10945   if (Aggressive) {
10946     // fold (fsub (fma x, y, (fmul u, v)), z)
10947     //   -> (fma x, y (fma u, v, (fneg z)))
10948     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
10949         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
10950         N0.getOperand(2)->hasOneUse()) {
10951       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10952                          N0.getOperand(0), N0.getOperand(1),
10953                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10954                                      N0.getOperand(2).getOperand(0),
10955                                      N0.getOperand(2).getOperand(1),
10956                                      DAG.getNode(ISD::FNEG, SL, VT,
10957                                                  N1), Flags), Flags);
10958     }
10959
10960     // fold (fsub x, (fma y, z, (fmul u, v)))
10961     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
10962     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
10963         isContractableFMUL(N1.getOperand(2))) {
10964       SDValue N20 = N1.getOperand(2).getOperand(0);
10965       SDValue N21 = N1.getOperand(2).getOperand(1);
10966       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10967                          DAG.getNode(ISD::FNEG, SL, VT,
10968                                      N1.getOperand(0)),
10969                          N1.getOperand(1),
10970                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10971                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
10972                                      N21, N0, Flags), Flags);
10973     }
10974
10975
10976     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
10977     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
10978     if (N0.getOpcode() == PreferredFusedOpcode) {
10979       SDValue N02 = N0.getOperand(2);
10980       if (N02.getOpcode() == ISD::FP_EXTEND) {
10981         SDValue N020 = N02.getOperand(0);
10982         if (isContractableFMUL(N020) &&
10983             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10984           return DAG.getNode(PreferredFusedOpcode, SL, VT,
10985                              N0.getOperand(0), N0.getOperand(1),
10986                              DAG.getNode(PreferredFusedOpcode, SL, VT,
10987                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10988                                                      N020.getOperand(0)),
10989                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10990                                                      N020.getOperand(1)),
10991                                          DAG.getNode(ISD::FNEG, SL, VT,
10992                                                      N1), Flags), Flags);
10993         }
10994       }
10995     }
10996
10997     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
10998     //   -> (fma (fpext x), (fpext y),
10999     //           (fma (fpext u), (fpext v), (fneg z)))
11000     // FIXME: This turns two single-precision and one double-precision
11001     // operation into two double-precision operations, which might not be
11002     // interesting for all targets, especially GPUs.
11003     if (N0.getOpcode() == ISD::FP_EXTEND) {
11004       SDValue N00 = N0.getOperand(0);
11005       if (N00.getOpcode() == PreferredFusedOpcode) {
11006         SDValue N002 = N00.getOperand(2);
11007         if (isContractableFMUL(N002) &&
11008             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11009           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11010                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11011                                          N00.getOperand(0)),
11012                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11013                                          N00.getOperand(1)),
11014                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11015                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11016                                                      N002.getOperand(0)),
11017                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11018                                                      N002.getOperand(1)),
11019                                          DAG.getNode(ISD::FNEG, SL, VT,
11020                                                      N1), Flags), Flags);
11021         }
11022       }
11023     }
11024
11025     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11026     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11027     if (N1.getOpcode() == PreferredFusedOpcode &&
11028         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
11029       SDValue N120 = N1.getOperand(2).getOperand(0);
11030       if (isContractableFMUL(N120) &&
11031           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11032         SDValue N1200 = N120.getOperand(0);
11033         SDValue N1201 = N120.getOperand(1);
11034         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11035                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
11036                            N1.getOperand(1),
11037                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11038                                        DAG.getNode(ISD::FNEG, SL, VT,
11039                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11040                                                                VT, N1200)),
11041                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11042                                                    N1201),
11043                                        N0, Flags), Flags);
11044       }
11045     }
11046
11047     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
11048     //   -> (fma (fneg (fpext y)), (fpext z),
11049     //           (fma (fneg (fpext u)), (fpext v), x))
11050     // FIXME: This turns two single-precision and one double-precision
11051     // operation into two double-precision operations, which might not be
11052     // interesting for all targets, especially GPUs.
11053     if (N1.getOpcode() == ISD::FP_EXTEND &&
11054         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
11055       SDValue CvtSrc = N1.getOperand(0);
11056       SDValue N100 = CvtSrc.getOperand(0);
11057       SDValue N101 = CvtSrc.getOperand(1);
11058       SDValue N102 = CvtSrc.getOperand(2);
11059       if (isContractableFMUL(N102) &&
11060           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
11061         SDValue N1020 = N102.getOperand(0);
11062         SDValue N1021 = N102.getOperand(1);
11063         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11064                            DAG.getNode(ISD::FNEG, SL, VT,
11065                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11066                                                    N100)),
11067                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
11068                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11069                                        DAG.getNode(ISD::FNEG, SL, VT,
11070                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11071                                                                VT, N1020)),
11072                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11073                                                    N1021),
11074                                        N0, Flags), Flags);
11075       }
11076     }
11077   }
11078
11079   return SDValue();
11080 }
11081
11082 /// Try to perform FMA combining on a given FMUL node based on the distributive
11083 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
11084 /// subtraction instead of addition).
11085 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
11086   SDValue N0 = N->getOperand(0);
11087   SDValue N1 = N->getOperand(1);
11088   EVT VT = N->getValueType(0);
11089   SDLoc SL(N);
11090   const SDNodeFlags Flags = N->getFlags();
11091
11092   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
11093
11094   const TargetOptions &Options = DAG.getTarget().Options;
11095
11096   // The transforms below are incorrect when x == 0 and y == inf, because the
11097   // intermediate multiplication produces a nan.
11098   if (!Options.NoInfsFPMath)
11099     return SDValue();
11100
11101   // Floating-point multiply-add without intermediate rounding.
11102   bool HasFMA =
11103       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
11104       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11105       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11106
11107   // Floating-point multiply-add with intermediate rounding. This can result
11108   // in a less precise result due to the changed rounding order.
11109   bool HasFMAD = Options.UnsafeFPMath &&
11110                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11111
11112   // No valid opcode, do not combine.
11113   if (!HasFMAD && !HasFMA)
11114     return SDValue();
11115
11116   // Always prefer FMAD to FMA for precision.
11117   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11118   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11119
11120   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
11121   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
11122   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11123     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
11124       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
11125         if (C->isExactlyValue(+1.0))
11126           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11127                              Y, Flags);
11128         if (C->isExactlyValue(-1.0))
11129           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11130                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11131       }
11132     }
11133     return SDValue();
11134   };
11135
11136   if (SDValue FMA = FuseFADD(N0, N1, Flags))
11137     return FMA;
11138   if (SDValue FMA = FuseFADD(N1, N0, Flags))
11139     return FMA;
11140
11141   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11142   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11143   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11144   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11145   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11146     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
11147       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11148         if (C0->isExactlyValue(+1.0))
11149           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11150                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11151                              Y, Flags);
11152         if (C0->isExactlyValue(-1.0))
11153           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11154                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11155                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11156       }
11157       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11158         if (C1->isExactlyValue(+1.0))
11159           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11160                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11161         if (C1->isExactlyValue(-1.0))
11162           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11163                              Y, Flags);
11164       }
11165     }
11166     return SDValue();
11167   };
11168
11169   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11170     return FMA;
11171   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11172     return FMA;
11173
11174   return SDValue();
11175 }
11176
11177 SDValue DAGCombiner::visitFADD(SDNode *N) {
11178   SDValue N0 = N->getOperand(0);
11179   SDValue N1 = N->getOperand(1);
11180   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11181   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11182   EVT VT = N->getValueType(0);
11183   SDLoc DL(N);
11184   const TargetOptions &Options = DAG.getTarget().Options;
11185   const SDNodeFlags Flags = N->getFlags();
11186
11187   // fold vector ops
11188   if (VT.isVector())
11189     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11190       return FoldedVOp;
11191
11192   // fold (fadd c1, c2) -> c1 + c2
11193   if (N0CFP && N1CFP)
11194     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
11195
11196   // canonicalize constant to RHS
11197   if (N0CFP && !N1CFP)
11198     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
11199
11200   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11201   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
11202   if (N1C && N1C->isZero())
11203     if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
11204       return N0;
11205
11206   if (SDValue NewSel = foldBinOpIntoSelect(N))
11207     return NewSel;
11208
11209   // fold (fadd A, (fneg B)) -> (fsub A, B)
11210   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11211       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
11212     return DAG.getNode(ISD::FSUB, DL, VT, N0,
11213                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
11214
11215   // fold (fadd (fneg A), B) -> (fsub B, A)
11216   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11217       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
11218     return DAG.getNode(ISD::FSUB, DL, VT, N1,
11219                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
11220
11221   auto isFMulNegTwo = [](SDValue FMul) {
11222     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
11223       return false;
11224     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
11225     return C && C->isExactlyValue(-2.0);
11226   };
11227
11228   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
11229   if (isFMulNegTwo(N0)) {
11230     SDValue B = N0.getOperand(0);
11231     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11232     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
11233   }
11234   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
11235   if (isFMulNegTwo(N1)) {
11236     SDValue B = N1.getOperand(0);
11237     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11238     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
11239   }
11240
11241   // No FP constant should be created after legalization as Instruction
11242   // Selection pass has a hard time dealing with FP constants.
11243   bool AllowNewConst = (Level < AfterLegalizeDAG);
11244
11245   // If 'unsafe math' or nnan is enabled, fold lots of things.
11246   if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
11247     // If allowed, fold (fadd (fneg x), x) -> 0.0
11248     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
11249       return DAG.getConstantFP(0.0, DL, VT);
11250
11251     // If allowed, fold (fadd x, (fneg x)) -> 0.0
11252     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
11253       return DAG.getConstantFP(0.0, DL, VT);
11254   }
11255
11256   // If 'unsafe math' or reassoc and nsz, fold lots of things.
11257   // TODO: break out portions of the transformations below for which Unsafe is
11258   //       considered and which do not require both nsz and reassoc
11259   if ((Options.UnsafeFPMath ||
11260        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
11261       AllowNewConst) {
11262     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
11263     if (N1CFP && N0.getOpcode() == ISD::FADD &&
11264         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11265       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
11266       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
11267     }
11268
11269     // We can fold chains of FADD's of the same value into multiplications.
11270     // This transform is not safe in general because we are reducing the number
11271     // of rounding steps.
11272     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
11273       if (N0.getOpcode() == ISD::FMUL) {
11274         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11275         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
11276
11277         // (fadd (fmul x, c), x) -> (fmul x, c+1)
11278         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
11279           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11280                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11281           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
11282         }
11283
11284         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
11285         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
11286             N1.getOperand(0) == N1.getOperand(1) &&
11287             N0.getOperand(0) == N1.getOperand(0)) {
11288           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11289                                        DAG.getConstantFP(2.0, DL, VT), Flags);
11290           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
11291         }
11292       }
11293
11294       if (N1.getOpcode() == ISD::FMUL) {
11295         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11296         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
11297
11298         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
11299         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
11300           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11301                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11302           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
11303         }
11304
11305         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
11306         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
11307             N0.getOperand(0) == N0.getOperand(1) &&
11308             N1.getOperand(0) == N0.getOperand(0)) {
11309           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11310                                        DAG.getConstantFP(2.0, DL, VT), Flags);
11311           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
11312         }
11313       }
11314
11315       if (N0.getOpcode() == ISD::FADD) {
11316         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11317         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
11318         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
11319             (N0.getOperand(0) == N1)) {
11320           return DAG.getNode(ISD::FMUL, DL, VT,
11321                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
11322         }
11323       }
11324
11325       if (N1.getOpcode() == ISD::FADD) {
11326         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11327         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
11328         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
11329             N1.getOperand(0) == N0) {
11330           return DAG.getNode(ISD::FMUL, DL, VT,
11331                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
11332         }
11333       }
11334
11335       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
11336       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
11337           N0.getOperand(0) == N0.getOperand(1) &&
11338           N1.getOperand(0) == N1.getOperand(1) &&
11339           N0.getOperand(0) == N1.getOperand(0)) {
11340         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
11341                            DAG.getConstantFP(4.0, DL, VT), Flags);
11342       }
11343     }
11344   } // enable-unsafe-fp-math
11345
11346   // FADD -> FMA combines:
11347   if (SDValue Fused = visitFADDForFMACombine(N)) {
11348     AddToWorklist(Fused.getNode());
11349     return Fused;
11350   }
11351   return SDValue();
11352 }
11353
11354 SDValue DAGCombiner::visitFSUB(SDNode *N) {
11355   SDValue N0 = N->getOperand(0);
11356   SDValue N1 = N->getOperand(1);
11357   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11358   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11359   EVT VT = N->getValueType(0);
11360   SDLoc DL(N);
11361   const TargetOptions &Options = DAG.getTarget().Options;
11362   const SDNodeFlags Flags = N->getFlags();
11363
11364   // fold vector ops
11365   if (VT.isVector())
11366     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11367       return FoldedVOp;
11368
11369   // fold (fsub c1, c2) -> c1-c2
11370   if (N0CFP && N1CFP)
11371     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
11372
11373   if (SDValue NewSel = foldBinOpIntoSelect(N))
11374     return NewSel;
11375
11376   // (fsub A, 0) -> A
11377   if (N1CFP && N1CFP->isZero()) {
11378     if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
11379         Flags.hasNoSignedZeros()) {
11380       return N0;
11381     }
11382   }
11383
11384   if (N0 == N1) {
11385     // (fsub x, x) -> 0.0
11386     if (Options.UnsafeFPMath || Flags.hasNoNaNs())
11387       return DAG.getConstantFP(0.0f, DL, VT);
11388   }
11389
11390   // (fsub -0.0, N1) -> -N1
11391   if (N0CFP && N0CFP->isZero()) {
11392     if (N0CFP->isNegative() ||
11393         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
11394       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
11395         return GetNegatedExpression(N1, DAG, LegalOperations);
11396       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11397         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
11398     }
11399   }
11400
11401   if ((Options.UnsafeFPMath ||
11402       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
11403       && N1.getOpcode() == ISD::FADD) {
11404     // X - (X + Y) -> -Y
11405     if (N0 == N1->getOperand(0))
11406       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
11407     // X - (Y + X) -> -Y
11408     if (N0 == N1->getOperand(1))
11409       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
11410   }
11411
11412   // fold (fsub A, (fneg B)) -> (fadd A, B)
11413   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
11414     return DAG.getNode(ISD::FADD, DL, VT, N0,
11415                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
11416
11417   // FSUB -> FMA combines:
11418   if (SDValue Fused = visitFSUBForFMACombine(N)) {
11419     AddToWorklist(Fused.getNode());
11420     return Fused;
11421   }
11422
11423   return SDValue();
11424 }
11425
11426 SDValue DAGCombiner::visitFMUL(SDNode *N) {
11427   SDValue N0 = N->getOperand(0);
11428   SDValue N1 = N->getOperand(1);
11429   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11430   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11431   EVT VT = N->getValueType(0);
11432   SDLoc DL(N);
11433   const TargetOptions &Options = DAG.getTarget().Options;
11434   const SDNodeFlags Flags = N->getFlags();
11435
11436   // fold vector ops
11437   if (VT.isVector()) {
11438     // This just handles C1 * C2 for vectors. Other vector folds are below.
11439     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11440       return FoldedVOp;
11441   }
11442
11443   // fold (fmul c1, c2) -> c1*c2
11444   if (N0CFP && N1CFP)
11445     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
11446
11447   // canonicalize constant to RHS
11448   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11449      !isConstantFPBuildVectorOrConstantFP(N1))
11450     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
11451
11452   // fold (fmul A, 1.0) -> A
11453   if (N1CFP && N1CFP->isExactlyValue(1.0))
11454     return N0;
11455
11456   if (SDValue NewSel = foldBinOpIntoSelect(N))
11457     return NewSel;
11458
11459   if (Options.UnsafeFPMath ||
11460       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
11461     // fold (fmul A, 0) -> 0
11462     if (N1CFP && N1CFP->isZero())
11463       return N1;
11464   }
11465
11466   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
11467     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11468     if (isConstantFPBuildVectorOrConstantFP(N1) &&
11469         N0.getOpcode() == ISD::FMUL) {
11470       SDValue N00 = N0.getOperand(0);
11471       SDValue N01 = N0.getOperand(1);
11472       // Avoid an infinite loop by making sure that N00 is not a constant
11473       // (the inner multiply has not been constant folded yet).
11474       if (isConstantFPBuildVectorOrConstantFP(N01) &&
11475           !isConstantFPBuildVectorOrConstantFP(N00)) {
11476         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
11477         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
11478       }
11479     }
11480
11481     // Match a special-case: we convert X * 2.0 into fadd.
11482     // fmul (fadd X, X), C -> fmul X, 2.0 * C
11483     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
11484         N0.getOperand(0) == N0.getOperand(1)) {
11485       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
11486       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
11487       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
11488     }
11489   }
11490
11491   // fold (fmul X, 2.0) -> (fadd X, X)
11492   if (N1CFP && N1CFP->isExactlyValue(+2.0))
11493     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
11494
11495   // fold (fmul X, -1.0) -> (fneg X)
11496   if (N1CFP && N1CFP->isExactlyValue(-1.0))
11497     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11498       return DAG.getNode(ISD::FNEG, DL, VT, N0);
11499
11500   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11501   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11502     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11503       // Both can be negated for free, check to see if at least one is cheaper
11504       // negated.
11505       if (LHSNeg == 2 || RHSNeg == 2)
11506         return DAG.getNode(ISD::FMUL, DL, VT,
11507                            GetNegatedExpression(N0, DAG, LegalOperations),
11508                            GetNegatedExpression(N1, DAG, LegalOperations),
11509                            Flags);
11510     }
11511   }
11512
11513   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11514   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11515   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
11516       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
11517       TLI.isOperationLegal(ISD::FABS, VT)) {
11518     SDValue Select = N0, X = N1;
11519     if (Select.getOpcode() != ISD::SELECT)
11520       std::swap(Select, X);
11521
11522     SDValue Cond = Select.getOperand(0);
11523     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
11524     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
11525
11526     if (TrueOpnd && FalseOpnd &&
11527         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
11528         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
11529         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
11530       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11531       switch (CC) {
11532       default: break;
11533       case ISD::SETOLT:
11534       case ISD::SETULT:
11535       case ISD::SETOLE:
11536       case ISD::SETULE:
11537       case ISD::SETLT:
11538       case ISD::SETLE:
11539         std::swap(TrueOpnd, FalseOpnd);
11540         LLVM_FALLTHROUGH;
11541       case ISD::SETOGT:
11542       case ISD::SETUGT:
11543       case ISD::SETOGE:
11544       case ISD::SETUGE:
11545       case ISD::SETGT:
11546       case ISD::SETGE:
11547         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
11548             TLI.isOperationLegal(ISD::FNEG, VT))
11549           return DAG.getNode(ISD::FNEG, DL, VT,
11550                    DAG.getNode(ISD::FABS, DL, VT, X));
11551         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
11552           return DAG.getNode(ISD::FABS, DL, VT, X);
11553
11554         break;
11555       }
11556     }
11557   }
11558
11559   // FMUL -> FMA combines:
11560   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
11561     AddToWorklist(Fused.getNode());
11562     return Fused;
11563   }
11564
11565   return SDValue();
11566 }
11567
11568 SDValue DAGCombiner::visitFMA(SDNode *N) {
11569   SDValue N0 = N->getOperand(0);
11570   SDValue N1 = N->getOperand(1);
11571   SDValue N2 = N->getOperand(2);
11572   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11573   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11574   EVT VT = N->getValueType(0);
11575   SDLoc DL(N);
11576   const TargetOptions &Options = DAG.getTarget().Options;
11577
11578   // FMA nodes have flags that propagate to the created nodes.
11579   const SDNodeFlags Flags = N->getFlags();
11580   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
11581
11582   // Constant fold FMA.
11583   if (isa<ConstantFPSDNode>(N0) &&
11584       isa<ConstantFPSDNode>(N1) &&
11585       isa<ConstantFPSDNode>(N2)) {
11586     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
11587   }
11588
11589   if (UnsafeFPMath) {
11590     if (N0CFP && N0CFP->isZero())
11591       return N2;
11592     if (N1CFP && N1CFP->isZero())
11593       return N2;
11594   }
11595   // TODO: The FMA node should have flags that propagate to these nodes.
11596   if (N0CFP && N0CFP->isExactlyValue(1.0))
11597     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
11598   if (N1CFP && N1CFP->isExactlyValue(1.0))
11599     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
11600
11601   // Canonicalize (fma c, x, y) -> (fma x, c, y)
11602   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11603      !isConstantFPBuildVectorOrConstantFP(N1))
11604     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
11605
11606   if (UnsafeFPMath) {
11607     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11608     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
11609         isConstantFPBuildVectorOrConstantFP(N1) &&
11610         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
11611       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11612                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
11613                                      Flags), Flags);
11614     }
11615
11616     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11617     if (N0.getOpcode() == ISD::FMUL &&
11618         isConstantFPBuildVectorOrConstantFP(N1) &&
11619         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11620       return DAG.getNode(ISD::FMA, DL, VT,
11621                          N0.getOperand(0),
11622                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
11623                                      Flags),
11624                          N2);
11625     }
11626   }
11627
11628   // (fma x, 1, y) -> (fadd x, y)
11629   // (fma x, -1, y) -> (fadd (fneg x), y)
11630   if (N1CFP) {
11631     if (N1CFP->isExactlyValue(1.0))
11632       // TODO: The FMA node should have flags that propagate to this node.
11633       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
11634
11635     if (N1CFP->isExactlyValue(-1.0) &&
11636         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
11637       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
11638       AddToWorklist(RHSNeg.getNode());
11639       // TODO: The FMA node should have flags that propagate to this node.
11640       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
11641     }
11642
11643     // fma (fneg x), K, y -> fma x -K, y
11644     if (N0.getOpcode() == ISD::FNEG &&
11645         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11646          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
11647       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
11648                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
11649     }
11650   }
11651
11652   if (UnsafeFPMath) {
11653     // (fma x, c, x) -> (fmul x, (c+1))
11654     if (N1CFP && N0 == N2) {
11655       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11656                          DAG.getNode(ISD::FADD, DL, VT, N1,
11657                                      DAG.getConstantFP(1.0, DL, VT), Flags),
11658                          Flags);
11659     }
11660
11661     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
11662     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
11663       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11664                          DAG.getNode(ISD::FADD, DL, VT, N1,
11665                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
11666                          Flags);
11667     }
11668   }
11669
11670   return SDValue();
11671 }
11672
11673 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11674 // reciprocal.
11675 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
11676 // Notice that this is not always beneficial. One reason is different targets
11677 // may have different costs for FDIV and FMUL, so sometimes the cost of two
11678 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
11679 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
11680 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
11681   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
11682   const SDNodeFlags Flags = N->getFlags();
11683   if (!UnsafeMath && !Flags.hasAllowReciprocal())
11684     return SDValue();
11685
11686   // Skip if current node is a reciprocal.
11687   SDValue N0 = N->getOperand(0);
11688   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11689   if (N0CFP && N0CFP->isExactlyValue(1.0))
11690     return SDValue();
11691
11692   // Exit early if the target does not want this transform or if there can't
11693   // possibly be enough uses of the divisor to make the transform worthwhile.
11694   SDValue N1 = N->getOperand(1);
11695   unsigned MinUses = TLI.combineRepeatedFPDivisors();
11696   if (!MinUses || N1->use_size() < MinUses)
11697     return SDValue();
11698
11699   // Find all FDIV users of the same divisor.
11700   // Use a set because duplicates may be present in the user list.
11701   SetVector<SDNode *> Users;
11702   for (auto *U : N1->uses()) {
11703     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
11704       // This division is eligible for optimization only if global unsafe math
11705       // is enabled or if this division allows reciprocal formation.
11706       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
11707         Users.insert(U);
11708     }
11709   }
11710
11711   // Now that we have the actual number of divisor uses, make sure it meets
11712   // the minimum threshold specified by the target.
11713   if (Users.size() < MinUses)
11714     return SDValue();
11715
11716   EVT VT = N->getValueType(0);
11717   SDLoc DL(N);
11718   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
11719   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
11720
11721   // Dividend / Divisor -> Dividend * Reciprocal
11722   for (auto *U : Users) {
11723     SDValue Dividend = U->getOperand(0);
11724     if (Dividend != FPOne) {
11725       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
11726                                     Reciprocal, Flags);
11727       CombineTo(U, NewNode);
11728     } else if (U != Reciprocal.getNode()) {
11729       // In the absence of fast-math-flags, this user node is always the
11730       // same node as Reciprocal, but with FMF they may be different nodes.
11731       CombineTo(U, Reciprocal);
11732     }
11733   }
11734   return SDValue(N, 0);  // N was replaced.
11735 }
11736
11737 SDValue DAGCombiner::visitFDIV(SDNode *N) {
11738   SDValue N0 = N->getOperand(0);
11739   SDValue N1 = N->getOperand(1);
11740   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11741   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11742   EVT VT = N->getValueType(0);
11743   SDLoc DL(N);
11744   const TargetOptions &Options = DAG.getTarget().Options;
11745   SDNodeFlags Flags = N->getFlags();
11746
11747   // fold vector ops
11748   if (VT.isVector())
11749     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11750       return FoldedVOp;
11751
11752   // fold (fdiv c1, c2) -> c1/c2
11753   if (N0CFP && N1CFP)
11754     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
11755
11756   if (SDValue NewSel = foldBinOpIntoSelect(N))
11757     return NewSel;
11758
11759   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
11760     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
11761     if (N1CFP) {
11762       // Compute the reciprocal 1.0 / c2.
11763       const APFloat &N1APF = N1CFP->getValueAPF();
11764       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
11765       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
11766       // Only do the transform if the reciprocal is a legal fp immediate that
11767       // isn't too nasty (eg NaN, denormal, ...).
11768       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
11769           (!LegalOperations ||
11770            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
11771            // backend)... we should handle this gracefully after Legalize.
11772            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
11773            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11774            TLI.isFPImmLegal(Recip, VT)))
11775         return DAG.getNode(ISD::FMUL, DL, VT, N0,
11776                            DAG.getConstantFP(Recip, DL, VT), Flags);
11777     }
11778
11779     // If this FDIV is part of a reciprocal square root, it may be folded
11780     // into a target-specific square root estimate instruction.
11781     if (N1.getOpcode() == ISD::FSQRT) {
11782       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
11783         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11784       }
11785     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
11786                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11787       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11788                                           Flags)) {
11789         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
11790         AddToWorklist(RV.getNode());
11791         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11792       }
11793     } else if (N1.getOpcode() == ISD::FP_ROUND &&
11794                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11795       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11796                                           Flags)) {
11797         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
11798         AddToWorklist(RV.getNode());
11799         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11800       }
11801     } else if (N1.getOpcode() == ISD::FMUL) {
11802       // Look through an FMUL. Even though this won't remove the FDIV directly,
11803       // it's still worthwhile to get rid of the FSQRT if possible.
11804       SDValue SqrtOp;
11805       SDValue OtherOp;
11806       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11807         SqrtOp = N1.getOperand(0);
11808         OtherOp = N1.getOperand(1);
11809       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
11810         SqrtOp = N1.getOperand(1);
11811         OtherOp = N1.getOperand(0);
11812       }
11813       if (SqrtOp.getNode()) {
11814         // We found a FSQRT, so try to make this fold:
11815         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
11816         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
11817           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
11818           AddToWorklist(RV.getNode());
11819           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11820         }
11821       }
11822     }
11823
11824     // Fold into a reciprocal estimate and multiply instead of a real divide.
11825     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
11826       AddToWorklist(RV.getNode());
11827       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11828     }
11829   }
11830
11831   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
11832   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11833     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11834       // Both can be negated for free, check to see if at least one is cheaper
11835       // negated.
11836       if (LHSNeg == 2 || RHSNeg == 2)
11837         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
11838                            GetNegatedExpression(N0, DAG, LegalOperations),
11839                            GetNegatedExpression(N1, DAG, LegalOperations),
11840                            Flags);
11841     }
11842   }
11843
11844   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
11845     return CombineRepeatedDivisors;
11846
11847   return SDValue();
11848 }
11849
11850 SDValue DAGCombiner::visitFREM(SDNode *N) {
11851   SDValue N0 = N->getOperand(0);
11852   SDValue N1 = N->getOperand(1);
11853   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11854   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11855   EVT VT = N->getValueType(0);
11856
11857   // fold (frem c1, c2) -> fmod(c1,c2)
11858   if (N0CFP && N1CFP)
11859     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
11860
11861   if (SDValue NewSel = foldBinOpIntoSelect(N))
11862     return NewSel;
11863
11864   return SDValue();
11865 }
11866
11867 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
11868   SDNodeFlags Flags = N->getFlags();
11869   if (!DAG.getTarget().Options.UnsafeFPMath &&
11870       !Flags.hasApproximateFuncs())
11871     return SDValue();
11872
11873   SDValue N0 = N->getOperand(0);
11874   if (TLI.isFsqrtCheap(N0, DAG))
11875     return SDValue();
11876
11877   // FSQRT nodes have flags that propagate to the created nodes.
11878   return buildSqrtEstimate(N0, Flags);
11879 }
11880
11881 /// copysign(x, fp_extend(y)) -> copysign(x, y)
11882 /// copysign(x, fp_round(y)) -> copysign(x, y)
11883 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
11884   SDValue N1 = N->getOperand(1);
11885   if ((N1.getOpcode() == ISD::FP_EXTEND ||
11886        N1.getOpcode() == ISD::FP_ROUND)) {
11887     // Do not optimize out type conversion of f128 type yet.
11888     // For some targets like x86_64, configuration is changed to keep one f128
11889     // value in one SSE register, but instruction selection cannot handle
11890     // FCOPYSIGN on SSE registers yet.
11891     EVT N1VT = N1->getValueType(0);
11892     EVT N1Op0VT = N1->getOperand(0).getValueType();
11893     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
11894   }
11895   return false;
11896 }
11897
11898 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
11899   SDValue N0 = N->getOperand(0);
11900   SDValue N1 = N->getOperand(1);
11901   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11902   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11903   EVT VT = N->getValueType(0);
11904
11905   if (N0CFP && N1CFP) // Constant fold
11906     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
11907
11908   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
11909     const APFloat &V = N1C->getValueAPF();
11910     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
11911     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
11912     if (!V.isNegative()) {
11913       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
11914         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11915     } else {
11916       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11917         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
11918                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
11919     }
11920   }
11921
11922   // copysign(fabs(x), y) -> copysign(x, y)
11923   // copysign(fneg(x), y) -> copysign(x, y)
11924   // copysign(copysign(x,z), y) -> copysign(x, y)
11925   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
11926       N0.getOpcode() == ISD::FCOPYSIGN)
11927     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
11928
11929   // copysign(x, abs(y)) -> abs(x)
11930   if (N1.getOpcode() == ISD::FABS)
11931     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11932
11933   // copysign(x, copysign(y,z)) -> copysign(x, z)
11934   if (N1.getOpcode() == ISD::FCOPYSIGN)
11935     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
11936
11937   // copysign(x, fp_extend(y)) -> copysign(x, y)
11938   // copysign(x, fp_round(y)) -> copysign(x, y)
11939   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
11940     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
11941
11942   return SDValue();
11943 }
11944
11945 SDValue DAGCombiner::visitFPOW(SDNode *N) {
11946   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
11947   if (!ExponentC)
11948     return SDValue();
11949
11950   // Try to convert x ** (1/3) into cube root.
11951   // TODO: Handle the various flavors of long double.
11952   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
11953   //       Some range near 1/3 should be fine.
11954   EVT VT = N->getValueType(0);
11955   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
11956       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
11957     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
11958     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
11959     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
11960     // For regular numbers, rounding may cause the results to differ.
11961     // Therefore, we require { nsz ninf nnan afn } for this transform.
11962     // TODO: We could select out the special cases if we don't have nsz/ninf.
11963     SDNodeFlags Flags = N->getFlags();
11964     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
11965         !Flags.hasApproximateFuncs())
11966       return SDValue();
11967
11968     // Do not create a cbrt() libcall if the target does not have it, and do not
11969     // turn a pow that has lowering support into a cbrt() libcall.
11970     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
11971         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
11972          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
11973       return SDValue();
11974
11975     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
11976   }
11977
11978   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
11979   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
11980   // TODO: This could be extended (using a target hook) to handle smaller
11981   // power-of-2 fractional exponents.
11982   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
11983   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
11984   if (ExponentIs025 || ExponentIs075) {
11985     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
11986     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
11987     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
11988     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
11989     // For regular numbers, rounding may cause the results to differ.
11990     // Therefore, we require { nsz ninf afn } for this transform.
11991     // TODO: We could select out the special cases if we don't have nsz/ninf.
11992     SDNodeFlags Flags = N->getFlags();
11993
11994     // We only need no signed zeros for the 0.25 case.
11995     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
11996         !Flags.hasApproximateFuncs())
11997       return SDValue();
11998
11999     // Don't double the number of libcalls. We are trying to inline fast code.
12000     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
12001       return SDValue();
12002
12003     // Assume that libcalls are the smallest code.
12004     // TODO: This restriction should probably be lifted for vectors.
12005     if (DAG.getMachineFunction().getFunction().optForSize())
12006       return SDValue();
12007
12008     // pow(X, 0.25) --> sqrt(sqrt(X))
12009     SDLoc DL(N);
12010     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
12011     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
12012     if (ExponentIs025)
12013       return SqrtSqrt;
12014     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
12015     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
12016   }
12017
12018   return SDValue();
12019 }
12020
12021 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
12022                                const TargetLowering &TLI) {
12023   // This optimization is guarded by a function attribute because it may produce
12024   // unexpected results. Ie, programs may be relying on the platform-specific
12025   // undefined behavior when the float-to-int conversion overflows.
12026   const Function &F = DAG.getMachineFunction().getFunction();
12027   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
12028   if (StrictOverflow.getValueAsString().equals("false"))
12029     return SDValue();
12030
12031   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
12032   // replacing casts with a libcall. We also must be allowed to ignore -0.0
12033   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
12034   // conversions would return +0.0.
12035   // FIXME: We should be able to use node-level FMF here.
12036   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
12037   EVT VT = N->getValueType(0);
12038   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
12039       !DAG.getTarget().Options.NoSignedZerosFPMath)
12040     return SDValue();
12041
12042   // fptosi/fptoui round towards zero, so converting from FP to integer and
12043   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
12044   SDValue N0 = N->getOperand(0);
12045   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
12046       N0.getOperand(0).getValueType() == VT)
12047     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12048
12049   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
12050       N0.getOperand(0).getValueType() == VT)
12051     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12052
12053   return SDValue();
12054 }
12055
12056 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
12057   SDValue N0 = N->getOperand(0);
12058   EVT VT = N->getValueType(0);
12059   EVT OpVT = N0.getValueType();
12060
12061   // fold (sint_to_fp c1) -> c1fp
12062   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12063       // ...but only if the target supports immediate floating-point values
12064       (!LegalOperations ||
12065        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12066     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12067
12068   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
12069   // but UINT_TO_FP is legal on this target, try to convert.
12070   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
12071       hasOperation(ISD::UINT_TO_FP, OpVT)) {
12072     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
12073     if (DAG.SignBitIsZero(N0))
12074       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12075   }
12076
12077   // The next optimizations are desirable only if SELECT_CC can be lowered.
12078   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12079     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12080     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
12081         !VT.isVector() &&
12082         (!LegalOperations ||
12083          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12084       SDLoc DL(N);
12085       SDValue Ops[] =
12086         { N0.getOperand(0), N0.getOperand(1),
12087           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12088           N0.getOperand(2) };
12089       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12090     }
12091
12092     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
12093     //      (select_cc x, y, 1.0, 0.0,, cc)
12094     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
12095         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
12096         (!LegalOperations ||
12097          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12098       SDLoc DL(N);
12099       SDValue Ops[] =
12100         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
12101           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12102           N0.getOperand(0).getOperand(2) };
12103       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12104     }
12105   }
12106
12107   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12108     return FTrunc;
12109
12110   return SDValue();
12111 }
12112
12113 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
12114   SDValue N0 = N->getOperand(0);
12115   EVT VT = N->getValueType(0);
12116   EVT OpVT = N0.getValueType();
12117
12118   // fold (uint_to_fp c1) -> c1fp
12119   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12120       // ...but only if the target supports immediate floating-point values
12121       (!LegalOperations ||
12122        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12123     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12124
12125   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
12126   // but SINT_TO_FP is legal on this target, try to convert.
12127   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
12128       hasOperation(ISD::SINT_TO_FP, OpVT)) {
12129     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
12130     if (DAG.SignBitIsZero(N0))
12131       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12132   }
12133
12134   // The next optimizations are desirable only if SELECT_CC can be lowered.
12135   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12136     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12137     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
12138         (!LegalOperations ||
12139          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12140       SDLoc DL(N);
12141       SDValue Ops[] =
12142         { N0.getOperand(0), N0.getOperand(1),
12143           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12144           N0.getOperand(2) };
12145       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12146     }
12147   }
12148
12149   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12150     return FTrunc;
12151
12152   return SDValue();
12153 }
12154
12155 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12156 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
12157   SDValue N0 = N->getOperand(0);
12158   EVT VT = N->getValueType(0);
12159
12160   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
12161     return SDValue();
12162
12163   SDValue Src = N0.getOperand(0);
12164   EVT SrcVT = Src.getValueType();
12165   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
12166   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
12167
12168   // We can safely assume the conversion won't overflow the output range,
12169   // because (for example) (uint8_t)18293.f is undefined behavior.
12170
12171   // Since we can assume the conversion won't overflow, our decision as to
12172   // whether the input will fit in the float should depend on the minimum
12173   // of the input range and output range.
12174
12175   // This means this is also safe for a signed input and unsigned output, since
12176   // a negative input would lead to undefined behavior.
12177   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
12178   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
12179   unsigned ActualSize = std::min(InputSize, OutputSize);
12180   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
12181
12182   // We can only fold away the float conversion if the input range can be
12183   // represented exactly in the float range.
12184   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
12185     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
12186       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
12187                                                        : ISD::ZERO_EXTEND;
12188       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
12189     }
12190     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
12191       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
12192     return DAG.getBitcast(VT, Src);
12193   }
12194   return SDValue();
12195 }
12196
12197 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
12198   SDValue N0 = N->getOperand(0);
12199   EVT VT = N->getValueType(0);
12200
12201   // fold (fp_to_sint c1fp) -> c1
12202   if (isConstantFPBuildVectorOrConstantFP(N0))
12203     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
12204
12205   return FoldIntToFPToInt(N, DAG);
12206 }
12207
12208 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
12209   SDValue N0 = N->getOperand(0);
12210   EVT VT = N->getValueType(0);
12211
12212   // fold (fp_to_uint c1fp) -> c1
12213   if (isConstantFPBuildVectorOrConstantFP(N0))
12214     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
12215
12216   return FoldIntToFPToInt(N, DAG);
12217 }
12218
12219 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
12220   SDValue N0 = N->getOperand(0);
12221   SDValue N1 = N->getOperand(1);
12222   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12223   EVT VT = N->getValueType(0);
12224
12225   // fold (fp_round c1fp) -> c1fp
12226   if (N0CFP)
12227     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
12228
12229   // fold (fp_round (fp_extend x)) -> x
12230   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
12231     return N0.getOperand(0);
12232
12233   // fold (fp_round (fp_round x)) -> (fp_round x)
12234   if (N0.getOpcode() == ISD::FP_ROUND) {
12235     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
12236     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
12237
12238     // Skip this folding if it results in an fp_round from f80 to f16.
12239     //
12240     // f80 to f16 always generates an expensive (and as yet, unimplemented)
12241     // libcall to __truncxfhf2 instead of selecting native f16 conversion
12242     // instructions from f32 or f64.  Moreover, the first (value-preserving)
12243     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
12244     // x86.
12245     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
12246       return SDValue();
12247
12248     // If the first fp_round isn't a value preserving truncation, it might
12249     // introduce a tie in the second fp_round, that wouldn't occur in the
12250     // single-step fp_round we want to fold to.
12251     // In other words, double rounding isn't the same as rounding.
12252     // Also, this is a value preserving truncation iff both fp_round's are.
12253     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
12254       SDLoc DL(N);
12255       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
12256                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
12257     }
12258   }
12259
12260   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
12261   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
12262     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
12263                               N0.getOperand(0), N1);
12264     AddToWorklist(Tmp.getNode());
12265     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
12266                        Tmp, N0.getOperand(1));
12267   }
12268
12269   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12270     return NewVSel;
12271
12272   return SDValue();
12273 }
12274
12275 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
12276   SDValue N0 = N->getOperand(0);
12277   EVT VT = N->getValueType(0);
12278   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12279   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12280
12281   // fold (fp_round_inreg c1fp) -> c1fp
12282   if (N0CFP && isTypeLegal(EVT)) {
12283     SDLoc DL(N);
12284     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
12285     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
12286   }
12287
12288   return SDValue();
12289 }
12290
12291 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
12292   SDValue N0 = N->getOperand(0);
12293   EVT VT = N->getValueType(0);
12294
12295   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
12296   if (N->hasOneUse() &&
12297       N->use_begin()->getOpcode() == ISD::FP_ROUND)
12298     return SDValue();
12299
12300   // fold (fp_extend c1fp) -> c1fp
12301   if (isConstantFPBuildVectorOrConstantFP(N0))
12302     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
12303
12304   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
12305   if (N0.getOpcode() == ISD::FP16_TO_FP &&
12306       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
12307     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
12308
12309   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
12310   // value of X.
12311   if (N0.getOpcode() == ISD::FP_ROUND
12312       && N0.getConstantOperandVal(1) == 1) {
12313     SDValue In = N0.getOperand(0);
12314     if (In.getValueType() == VT) return In;
12315     if (VT.bitsLT(In.getValueType()))
12316       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
12317                          In, N0.getOperand(1));
12318     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
12319   }
12320
12321   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
12322   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12323        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12324     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12325     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12326                                      LN0->getChain(),
12327                                      LN0->getBasePtr(), N0.getValueType(),
12328                                      LN0->getMemOperand());
12329     CombineTo(N, ExtLoad);
12330     CombineTo(N0.getNode(),
12331               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
12332                           N0.getValueType(), ExtLoad,
12333                           DAG.getIntPtrConstant(1, SDLoc(N0))),
12334               ExtLoad.getValue(1));
12335     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12336   }
12337
12338   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12339     return NewVSel;
12340
12341   return SDValue();
12342 }
12343
12344 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
12345   SDValue N0 = N->getOperand(0);
12346   EVT VT = N->getValueType(0);
12347
12348   // fold (fceil c1) -> fceil(c1)
12349   if (isConstantFPBuildVectorOrConstantFP(N0))
12350     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
12351
12352   return SDValue();
12353 }
12354
12355 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
12356   SDValue N0 = N->getOperand(0);
12357   EVT VT = N->getValueType(0);
12358
12359   // fold (ftrunc c1) -> ftrunc(c1)
12360   if (isConstantFPBuildVectorOrConstantFP(N0))
12361     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
12362
12363   // fold ftrunc (known rounded int x) -> x
12364   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
12365   // likely to be generated to extract integer from a rounded floating value.
12366   switch (N0.getOpcode()) {
12367   default: break;
12368   case ISD::FRINT:
12369   case ISD::FTRUNC:
12370   case ISD::FNEARBYINT:
12371   case ISD::FFLOOR:
12372   case ISD::FCEIL:
12373     return N0;
12374   }
12375
12376   return SDValue();
12377 }
12378
12379 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
12380   SDValue N0 = N->getOperand(0);
12381   EVT VT = N->getValueType(0);
12382
12383   // fold (ffloor c1) -> ffloor(c1)
12384   if (isConstantFPBuildVectorOrConstantFP(N0))
12385     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
12386
12387   return SDValue();
12388 }
12389
12390 // FIXME: FNEG and FABS have a lot in common; refactor.
12391 SDValue DAGCombiner::visitFNEG(SDNode *N) {
12392   SDValue N0 = N->getOperand(0);
12393   EVT VT = N->getValueType(0);
12394
12395   // Constant fold FNEG.
12396   if (isConstantFPBuildVectorOrConstantFP(N0))
12397     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
12398
12399   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
12400                          &DAG.getTarget().Options))
12401     return GetNegatedExpression(N0, DAG, LegalOperations);
12402
12403   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
12404   // constant pool values.
12405   if (!TLI.isFNegFree(VT) &&
12406       N0.getOpcode() == ISD::BITCAST &&
12407       N0.getNode()->hasOneUse()) {
12408     SDValue Int = N0.getOperand(0);
12409     EVT IntVT = Int.getValueType();
12410     if (IntVT.isInteger() && !IntVT.isVector()) {
12411       APInt SignMask;
12412       if (N0.getValueType().isVector()) {
12413         // For a vector, get a mask such as 0x80... per scalar element
12414         // and splat it.
12415         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
12416         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12417       } else {
12418         // For a scalar, just generate 0x80...
12419         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
12420       }
12421       SDLoc DL0(N0);
12422       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
12423                         DAG.getConstant(SignMask, DL0, IntVT));
12424       AddToWorklist(Int.getNode());
12425       return DAG.getBitcast(VT, Int);
12426     }
12427   }
12428
12429   // (fneg (fmul c, x)) -> (fmul -c, x)
12430   if (N0.getOpcode() == ISD::FMUL &&
12431       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
12432     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
12433     if (CFP1) {
12434       APFloat CVal = CFP1->getValueAPF();
12435       CVal.changeSign();
12436       if (Level >= AfterLegalizeDAG &&
12437           (TLI.isFPImmLegal(CVal, VT) ||
12438            TLI.isOperationLegal(ISD::ConstantFP, VT)))
12439         return DAG.getNode(
12440             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
12441             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
12442             N0->getFlags());
12443     }
12444   }
12445
12446   return SDValue();
12447 }
12448
12449 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
12450                             APFloat (*Op)(const APFloat &, const APFloat &)) {
12451   SDValue N0 = N->getOperand(0);
12452   SDValue N1 = N->getOperand(1);
12453   EVT VT = N->getValueType(0);
12454   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
12455   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
12456
12457   if (N0CFP && N1CFP) {
12458     const APFloat &C0 = N0CFP->getValueAPF();
12459     const APFloat &C1 = N1CFP->getValueAPF();
12460     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
12461   }
12462
12463   // Canonicalize to constant on RHS.
12464   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12465       !isConstantFPBuildVectorOrConstantFP(N1))
12466     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
12467
12468   return SDValue();
12469 }
12470
12471 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
12472   return visitFMinMax(DAG, N, minnum);
12473 }
12474
12475 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
12476   return visitFMinMax(DAG, N, maxnum);
12477 }
12478
12479 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
12480   return visitFMinMax(DAG, N, minimum);
12481 }
12482
12483 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
12484   return visitFMinMax(DAG, N, maximum);
12485 }
12486
12487 SDValue DAGCombiner::visitFABS(SDNode *N) {
12488   SDValue N0 = N->getOperand(0);
12489   EVT VT = N->getValueType(0);
12490
12491   // fold (fabs c1) -> fabs(c1)
12492   if (isConstantFPBuildVectorOrConstantFP(N0))
12493     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12494
12495   // fold (fabs (fabs x)) -> (fabs x)
12496   if (N0.getOpcode() == ISD::FABS)
12497     return N->getOperand(0);
12498
12499   // fold (fabs (fneg x)) -> (fabs x)
12500   // fold (fabs (fcopysign x, y)) -> (fabs x)
12501   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
12502     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
12503
12504   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
12505   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
12506     SDValue Int = N0.getOperand(0);
12507     EVT IntVT = Int.getValueType();
12508     if (IntVT.isInteger() && !IntVT.isVector()) {
12509       APInt SignMask;
12510       if (N0.getValueType().isVector()) {
12511         // For a vector, get a mask such as 0x7f... per scalar element
12512         // and splat it.
12513         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
12514         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12515       } else {
12516         // For a scalar, just generate 0x7f...
12517         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
12518       }
12519       SDLoc DL(N0);
12520       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
12521                         DAG.getConstant(SignMask, DL, IntVT));
12522       AddToWorklist(Int.getNode());
12523       return DAG.getBitcast(N->getValueType(0), Int);
12524     }
12525   }
12526
12527   return SDValue();
12528 }
12529
12530 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
12531   SDValue Chain = N->getOperand(0);
12532   SDValue N1 = N->getOperand(1);
12533   SDValue N2 = N->getOperand(2);
12534
12535   // If N is a constant we could fold this into a fallthrough or unconditional
12536   // branch. However that doesn't happen very often in normal code, because
12537   // Instcombine/SimplifyCFG should have handled the available opportunities.
12538   // If we did this folding here, it would be necessary to update the
12539   // MachineBasicBlock CFG, which is awkward.
12540
12541   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12542   // on the target.
12543   if (N1.getOpcode() == ISD::SETCC &&
12544       TLI.isOperationLegalOrCustom(ISD::BR_CC,
12545                                    N1.getOperand(0).getValueType())) {
12546     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12547                        Chain, N1.getOperand(2),
12548                        N1.getOperand(0), N1.getOperand(1), N2);
12549   }
12550
12551   if (N1.hasOneUse()) {
12552     if (SDValue NewN1 = rebuildSetCC(N1))
12553       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
12554   }
12555
12556   return SDValue();
12557 }
12558
12559 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
12560   if (N.getOpcode() == ISD::SRL ||
12561       (N.getOpcode() == ISD::TRUNCATE &&
12562        (N.getOperand(0).hasOneUse() &&
12563         N.getOperand(0).getOpcode() == ISD::SRL))) {
12564     // Look pass the truncate.
12565     if (N.getOpcode() == ISD::TRUNCATE)
12566       N = N.getOperand(0);
12567
12568     // Match this pattern so that we can generate simpler code:
12569     //
12570     //   %a = ...
12571     //   %b = and i32 %a, 2
12572     //   %c = srl i32 %b, 1
12573     //   brcond i32 %c ...
12574     //
12575     // into
12576     //
12577     //   %a = ...
12578     //   %b = and i32 %a, 2
12579     //   %c = setcc eq %b, 0
12580     //   brcond %c ...
12581     //
12582     // This applies only when the AND constant value has one bit set and the
12583     // SRL constant is equal to the log2 of the AND constant. The back-end is
12584     // smart enough to convert the result into a TEST/JMP sequence.
12585     SDValue Op0 = N.getOperand(0);
12586     SDValue Op1 = N.getOperand(1);
12587
12588     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
12589       SDValue AndOp1 = Op0.getOperand(1);
12590
12591       if (AndOp1.getOpcode() == ISD::Constant) {
12592         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
12593
12594         if (AndConst.isPowerOf2() &&
12595             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
12596           SDLoc DL(N);
12597           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
12598                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
12599                               ISD::SETNE);
12600         }
12601       }
12602     }
12603   }
12604
12605   // Transform br(xor(x, y)) -> br(x != y)
12606   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12607   if (N.getOpcode() == ISD::XOR) {
12608     // Because we may call this on a speculatively constructed
12609     // SimplifiedSetCC Node, we need to simplify this node first.
12610     // Ideally this should be folded into SimplifySetCC and not
12611     // here. For now, grab a handle to N so we don't lose it from
12612     // replacements interal to the visit.
12613     HandleSDNode XORHandle(N);
12614     while (N.getOpcode() == ISD::XOR) {
12615       SDValue Tmp = visitXOR(N.getNode());
12616       // No simplification done.
12617       if (!Tmp.getNode())
12618         break;
12619       // Returning N is form in-visit replacement that may invalidated
12620       // N. Grab value from Handle.
12621       if (Tmp.getNode() == N.getNode())
12622         N = XORHandle.getValue();
12623       else // Node simplified. Try simplifying again.
12624         N = Tmp;
12625     }
12626
12627     if (N.getOpcode() != ISD::XOR)
12628       return N;
12629
12630     SDNode *TheXor = N.getNode();
12631
12632     SDValue Op0 = TheXor->getOperand(0);
12633     SDValue Op1 = TheXor->getOperand(1);
12634
12635     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
12636       bool Equal = false;
12637       if (isOneConstant(Op0) && Op0.hasOneUse() &&
12638           Op0.getOpcode() == ISD::XOR) {
12639         TheXor = Op0.getNode();
12640         Equal = true;
12641       }
12642
12643       EVT SetCCVT = N.getValueType();
12644       if (LegalTypes)
12645         SetCCVT = getSetCCResultType(SetCCVT);
12646       // Replace the uses of XOR with SETCC
12647       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
12648                           Equal ? ISD::SETEQ : ISD::SETNE);
12649     }
12650   }
12651
12652   return SDValue();
12653 }
12654
12655 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
12656 //
12657 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
12658   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
12659   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
12660
12661   // If N is a constant we could fold this into a fallthrough or unconditional
12662   // branch. However that doesn't happen very often in normal code, because
12663   // Instcombine/SimplifyCFG should have handled the available opportunities.
12664   // If we did this folding here, it would be necessary to update the
12665   // MachineBasicBlock CFG, which is awkward.
12666
12667   // Use SimplifySetCC to simplify SETCC's.
12668   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
12669                                CondLHS, CondRHS, CC->get(), SDLoc(N),
12670                                false);
12671   if (Simp.getNode()) AddToWorklist(Simp.getNode());
12672
12673   // fold to a simpler setcc
12674   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
12675     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12676                        N->getOperand(0), Simp.getOperand(2),
12677                        Simp.getOperand(0), Simp.getOperand(1),
12678                        N->getOperand(4));
12679
12680   return SDValue();
12681 }
12682
12683 /// Return true if 'Use' is a load or a store that uses N as its base pointer
12684 /// and that N may be folded in the load / store addressing mode.
12685 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
12686                                     SelectionDAG &DAG,
12687                                     const TargetLowering &TLI) {
12688   EVT VT;
12689   unsigned AS;
12690
12691   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
12692     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
12693       return false;
12694     VT = LD->getMemoryVT();
12695     AS = LD->getAddressSpace();
12696   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
12697     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
12698       return false;
12699     VT = ST->getMemoryVT();
12700     AS = ST->getAddressSpace();
12701   } else
12702     return false;
12703
12704   TargetLowering::AddrMode AM;
12705   if (N->getOpcode() == ISD::ADD) {
12706     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12707     if (Offset)
12708       // [reg +/- imm]
12709       AM.BaseOffs = Offset->getSExtValue();
12710     else
12711       // [reg +/- reg]
12712       AM.Scale = 1;
12713   } else if (N->getOpcode() == ISD::SUB) {
12714     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12715     if (Offset)
12716       // [reg +/- imm]
12717       AM.BaseOffs = -Offset->getSExtValue();
12718     else
12719       // [reg +/- reg]
12720       AM.Scale = 1;
12721   } else
12722     return false;
12723
12724   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
12725                                    VT.getTypeForEVT(*DAG.getContext()), AS);
12726 }
12727
12728 /// Try turning a load/store into a pre-indexed load/store when the base
12729 /// pointer is an add or subtract and it has other uses besides the load/store.
12730 /// After the transformation, the new indexed load/store has effectively folded
12731 /// the add/subtract in and all of its other uses are redirected to the
12732 /// new load/store.
12733 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
12734   if (Level < AfterLegalizeDAG)
12735     return false;
12736
12737   bool isLoad = true;
12738   SDValue Ptr;
12739   EVT VT;
12740   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
12741     if (LD->isIndexed())
12742       return false;
12743     VT = LD->getMemoryVT();
12744     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
12745         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
12746       return false;
12747     Ptr = LD->getBasePtr();
12748   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
12749     if (ST->isIndexed())
12750       return false;
12751     VT = ST->getMemoryVT();
12752     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
12753         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
12754       return false;
12755     Ptr = ST->getBasePtr();
12756     isLoad = false;
12757   } else {
12758     return false;
12759   }
12760
12761   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
12762   // out.  There is no reason to make this a preinc/predec.
12763   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
12764       Ptr.getNode()->hasOneUse())
12765     return false;
12766
12767   // Ask the target to do addressing mode selection.
12768   SDValue BasePtr;
12769   SDValue Offset;
12770   ISD::MemIndexedMode AM = ISD::UNINDEXED;
12771   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
12772     return false;
12773
12774   // Backends without true r+i pre-indexed forms may need to pass a
12775   // constant base with a variable offset so that constant coercion
12776   // will work with the patterns in canonical form.
12777   bool Swapped = false;
12778   if (isa<ConstantSDNode>(BasePtr)) {
12779     std::swap(BasePtr, Offset);
12780     Swapped = true;
12781   }
12782
12783   // Don't create a indexed load / store with zero offset.
12784   if (isNullConstant(Offset))
12785     return false;
12786
12787   // Try turning it into a pre-indexed load / store except when:
12788   // 1) The new base ptr is a frame index.
12789   // 2) If N is a store and the new base ptr is either the same as or is a
12790   //    predecessor of the value being stored.
12791   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
12792   //    that would create a cycle.
12793   // 4) All uses are load / store ops that use it as old base ptr.
12794
12795   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
12796   // (plus the implicit offset) to a register to preinc anyway.
12797   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12798     return false;
12799
12800   // Check #2.
12801   if (!isLoad) {
12802     SDValue Val = cast<StoreSDNode>(N)->getValue();
12803
12804     // Would require a copy.
12805     if (Val == BasePtr)
12806       return false;
12807
12808     // Would create a cycle.
12809     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
12810       return false;
12811   }
12812
12813   // Caches for hasPredecessorHelper.
12814   SmallPtrSet<const SDNode *, 32> Visited;
12815   SmallVector<const SDNode *, 16> Worklist;
12816   Worklist.push_back(N);
12817
12818   // If the offset is a constant, there may be other adds of constants that
12819   // can be folded with this one. We should do this to avoid having to keep
12820   // a copy of the original base pointer.
12821   SmallVector<SDNode *, 16> OtherUses;
12822   if (isa<ConstantSDNode>(Offset))
12823     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
12824                               UE = BasePtr.getNode()->use_end();
12825          UI != UE; ++UI) {
12826       SDUse &Use = UI.getUse();
12827       // Skip the use that is Ptr and uses of other results from BasePtr's
12828       // node (important for nodes that return multiple results).
12829       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
12830         continue;
12831
12832       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
12833         continue;
12834
12835       if (Use.getUser()->getOpcode() != ISD::ADD &&
12836           Use.getUser()->getOpcode() != ISD::SUB) {
12837         OtherUses.clear();
12838         break;
12839       }
12840
12841       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
12842       if (!isa<ConstantSDNode>(Op1)) {
12843         OtherUses.clear();
12844         break;
12845       }
12846
12847       // FIXME: In some cases, we can be smarter about this.
12848       if (Op1.getValueType() != Offset.getValueType()) {
12849         OtherUses.clear();
12850         break;
12851       }
12852
12853       OtherUses.push_back(Use.getUser());
12854     }
12855
12856   if (Swapped)
12857     std::swap(BasePtr, Offset);
12858
12859   // Now check for #3 and #4.
12860   bool RealUse = false;
12861
12862   for (SDNode *Use : Ptr.getNode()->uses()) {
12863     if (Use == N)
12864       continue;
12865     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
12866       return false;
12867
12868     // If Ptr may be folded in addressing mode of other use, then it's
12869     // not profitable to do this transformation.
12870     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
12871       RealUse = true;
12872   }
12873
12874   if (!RealUse)
12875     return false;
12876
12877   SDValue Result;
12878   if (isLoad)
12879     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12880                                 BasePtr, Offset, AM);
12881   else
12882     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12883                                  BasePtr, Offset, AM);
12884   ++PreIndexedNodes;
12885   ++NodesCombined;
12886   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
12887              Result.getNode()->dump(&DAG); dbgs() << '\n');
12888   WorklistRemover DeadNodes(*this);
12889   if (isLoad) {
12890     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12891     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12892   } else {
12893     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12894   }
12895
12896   // Finally, since the node is now dead, remove it from the graph.
12897   deleteAndRecombine(N);
12898
12899   if (Swapped)
12900     std::swap(BasePtr, Offset);
12901
12902   // Replace other uses of BasePtr that can be updated to use Ptr
12903   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
12904     unsigned OffsetIdx = 1;
12905     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
12906       OffsetIdx = 0;
12907     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
12908            BasePtr.getNode() && "Expected BasePtr operand");
12909
12910     // We need to replace ptr0 in the following expression:
12911     //   x0 * offset0 + y0 * ptr0 = t0
12912     // knowing that
12913     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
12914     //
12915     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
12916     // indexed load/store and the expression that needs to be re-written.
12917     //
12918     // Therefore, we have:
12919     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
12920
12921     ConstantSDNode *CN =
12922       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
12923     int X0, X1, Y0, Y1;
12924     const APInt &Offset0 = CN->getAPIntValue();
12925     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
12926
12927     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
12928     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
12929     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
12930     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
12931
12932     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
12933
12934     APInt CNV = Offset0;
12935     if (X0 < 0) CNV = -CNV;
12936     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
12937     else CNV = CNV - Offset1;
12938
12939     SDLoc DL(OtherUses[i]);
12940
12941     // We can now generate the new expression.
12942     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
12943     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
12944
12945     SDValue NewUse = DAG.getNode(Opcode,
12946                                  DL,
12947                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
12948     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
12949     deleteAndRecombine(OtherUses[i]);
12950   }
12951
12952   // Replace the uses of Ptr with uses of the updated base value.
12953   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
12954   deleteAndRecombine(Ptr.getNode());
12955   AddToWorklist(Result.getNode());
12956
12957   return true;
12958 }
12959
12960 /// Try to combine a load/store with a add/sub of the base pointer node into a
12961 /// post-indexed load/store. The transformation folded the add/subtract into the
12962 /// new indexed load/store effectively and all of its uses are redirected to the
12963 /// new load/store.
12964 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
12965   if (Level < AfterLegalizeDAG)
12966     return false;
12967
12968   bool isLoad = true;
12969   SDValue Ptr;
12970   EVT VT;
12971   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
12972     if (LD->isIndexed())
12973       return false;
12974     VT = LD->getMemoryVT();
12975     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
12976         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
12977       return false;
12978     Ptr = LD->getBasePtr();
12979   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
12980     if (ST->isIndexed())
12981       return false;
12982     VT = ST->getMemoryVT();
12983     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
12984         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
12985       return false;
12986     Ptr = ST->getBasePtr();
12987     isLoad = false;
12988   } else {
12989     return false;
12990   }
12991
12992   if (Ptr.getNode()->hasOneUse())
12993     return false;
12994
12995   for (SDNode *Op : Ptr.getNode()->uses()) {
12996     if (Op == N ||
12997         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
12998       continue;
12999
13000     SDValue BasePtr;
13001     SDValue Offset;
13002     ISD::MemIndexedMode AM = ISD::UNINDEXED;
13003     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
13004       // Don't create a indexed load / store with zero offset.
13005       if (isNullConstant(Offset))
13006         continue;
13007
13008       // Try turning it into a post-indexed load / store except when
13009       // 1) All uses are load / store ops that use it as base ptr (and
13010       //    it may be folded as addressing mmode).
13011       // 2) Op must be independent of N, i.e. Op is neither a predecessor
13012       //    nor a successor of N. Otherwise, if Op is folded that would
13013       //    create a cycle.
13014
13015       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13016         continue;
13017
13018       // Check for #1.
13019       bool TryNext = false;
13020       for (SDNode *Use : BasePtr.getNode()->uses()) {
13021         if (Use == Ptr.getNode())
13022           continue;
13023
13024         // If all the uses are load / store addresses, then don't do the
13025         // transformation.
13026         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
13027           bool RealUse = false;
13028           for (SDNode *UseUse : Use->uses()) {
13029             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
13030               RealUse = true;
13031           }
13032
13033           if (!RealUse) {
13034             TryNext = true;
13035             break;
13036           }
13037         }
13038       }
13039
13040       if (TryNext)
13041         continue;
13042
13043       // Check for #2.
13044       SmallPtrSet<const SDNode *, 32> Visited;
13045       SmallVector<const SDNode *, 8> Worklist;
13046       // Ptr is predecessor to both N and Op.
13047       Visited.insert(Ptr.getNode());
13048       Worklist.push_back(N);
13049       Worklist.push_back(Op);
13050       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
13051           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
13052         SDValue Result = isLoad
13053           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13054                                BasePtr, Offset, AM)
13055           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13056                                 BasePtr, Offset, AM);
13057         ++PostIndexedNodes;
13058         ++NodesCombined;
13059         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
13060                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
13061                    dbgs() << '\n');
13062         WorklistRemover DeadNodes(*this);
13063         if (isLoad) {
13064           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13065           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13066         } else {
13067           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13068         }
13069
13070         // Finally, since the node is now dead, remove it from the graph.
13071         deleteAndRecombine(N);
13072
13073         // Replace the uses of Use with uses of the updated base value.
13074         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
13075                                       Result.getValue(isLoad ? 1 : 0));
13076         deleteAndRecombine(Op);
13077         return true;
13078       }
13079     }
13080   }
13081
13082   return false;
13083 }
13084
13085 /// Return the base-pointer arithmetic from an indexed \p LD.
13086 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
13087   ISD::MemIndexedMode AM = LD->getAddressingMode();
13088   assert(AM != ISD::UNINDEXED);
13089   SDValue BP = LD->getOperand(1);
13090   SDValue Inc = LD->getOperand(2);
13091
13092   // Some backends use TargetConstants for load offsets, but don't expect
13093   // TargetConstants in general ADD nodes. We can convert these constants into
13094   // regular Constants (if the constant is not opaque).
13095   assert((Inc.getOpcode() != ISD::TargetConstant ||
13096           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
13097          "Cannot split out indexing using opaque target constants");
13098   if (Inc.getOpcode() == ISD::TargetConstant) {
13099     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
13100     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
13101                           ConstInc->getValueType(0));
13102   }
13103
13104   unsigned Opc =
13105       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
13106   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
13107 }
13108
13109 static inline int numVectorEltsOrZero(EVT T) {
13110   return T.isVector() ? T.getVectorNumElements() : 0;
13111 }
13112
13113 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
13114   Val = ST->getValue();
13115   EVT STType = Val.getValueType();
13116   EVT STMemType = ST->getMemoryVT();
13117   if (STType == STMemType)
13118     return true;
13119   if (isTypeLegal(STMemType))
13120     return false; // fail.
13121   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
13122       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
13123     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
13124     return true;
13125   }
13126   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
13127       STType.isInteger() && STMemType.isInteger()) {
13128     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
13129     return true;
13130   }
13131   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
13132     Val = DAG.getBitcast(STMemType, Val);
13133     return true;
13134   }
13135   return false; // fail.
13136 }
13137
13138 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
13139   EVT LDMemType = LD->getMemoryVT();
13140   EVT LDType = LD->getValueType(0);
13141   assert(Val.getValueType() == LDMemType &&
13142          "Attempting to extend value of non-matching type");
13143   if (LDType == LDMemType)
13144     return true;
13145   if (LDMemType.isInteger() && LDType.isInteger()) {
13146     switch (LD->getExtensionType()) {
13147     case ISD::NON_EXTLOAD:
13148       Val = DAG.getBitcast(LDType, Val);
13149       return true;
13150     case ISD::EXTLOAD:
13151       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
13152       return true;
13153     case ISD::SEXTLOAD:
13154       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
13155       return true;
13156     case ISD::ZEXTLOAD:
13157       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
13158       return true;
13159     }
13160   }
13161   return false;
13162 }
13163
13164 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
13165   if (OptLevel == CodeGenOpt::None || LD->isVolatile())
13166     return SDValue();
13167   SDValue Chain = LD->getOperand(0);
13168   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
13169   if (!ST || ST->isVolatile())
13170     return SDValue();
13171
13172   EVT LDType = LD->getValueType(0);
13173   EVT LDMemType = LD->getMemoryVT();
13174   EVT STMemType = ST->getMemoryVT();
13175   EVT STType = ST->getValue().getValueType();
13176
13177   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
13178   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
13179   int64_t Offset;
13180   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
13181     return SDValue();
13182
13183   // Normalize for Endianness. After this Offset=0 will denote that the least
13184   // significant bit in the loaded value maps to the least significant bit in
13185   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
13186   // n:th least significant byte of the stored value.
13187   if (DAG.getDataLayout().isBigEndian())
13188     Offset = (STMemType.getStoreSizeInBits() -
13189               LDMemType.getStoreSizeInBits()) / 8 - Offset;
13190
13191   // Check that the stored value cover all bits that are loaded.
13192   bool STCoversLD =
13193       (Offset >= 0) &&
13194       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
13195
13196   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
13197     if (LD->isIndexed()) {
13198       bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
13199                     LD->getAddressingMode() == ISD::POST_DEC);
13200       unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
13201       SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
13202                              LD->getOperand(1), LD->getOperand(2));
13203       SDValue Ops[] = {Val, Idx, Chain};
13204       return CombineTo(LD, Ops, 3);
13205     }
13206     return CombineTo(LD, Val, Chain);
13207   };
13208
13209   if (!STCoversLD)
13210     return SDValue();
13211
13212   // Memory as copy space (potentially masked).
13213   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
13214     // Simple case: Direct non-truncating forwarding
13215     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
13216       return ReplaceLd(LD, ST->getValue(), Chain);
13217     // Can we model the truncate and extension with an and mask?
13218     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
13219         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
13220       // Mask to size of LDMemType
13221       auto Mask =
13222           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
13223                                                STMemType.getSizeInBits()),
13224                           SDLoc(ST), STType);
13225       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
13226       return ReplaceLd(LD, Val, Chain);
13227     }
13228   }
13229
13230   // TODO: Deal with nonzero offset.
13231   if (LD->getBasePtr().isUndef() || Offset != 0)
13232     return SDValue();
13233   // Model necessary truncations / extenstions.
13234   SDValue Val;
13235   // Truncate Value To Stored Memory Size.
13236   do {
13237     if (!getTruncatedStoreValue(ST, Val))
13238       continue;
13239     if (!isTypeLegal(LDMemType))
13240       continue;
13241     if (STMemType != LDMemType) {
13242       // TODO: Support vectors? This requires extract_subvector/bitcast.
13243       if (!STMemType.isVector() && !LDMemType.isVector() &&
13244           STMemType.isInteger() && LDMemType.isInteger())
13245         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
13246       else
13247         continue;
13248     }
13249     if (!extendLoadedValueToExtension(LD, Val))
13250       continue;
13251     return ReplaceLd(LD, Val, Chain);
13252   } while (false);
13253
13254   // On failure, cleanup dead nodes we may have created.
13255   if (Val->use_empty())
13256     deleteAndRecombine(Val.getNode());
13257   return SDValue();
13258 }
13259
13260 SDValue DAGCombiner::visitLOAD(SDNode *N) {
13261   LoadSDNode *LD  = cast<LoadSDNode>(N);
13262   SDValue Chain = LD->getChain();
13263   SDValue Ptr   = LD->getBasePtr();
13264
13265   // If load is not volatile and there are no uses of the loaded value (and
13266   // the updated indexed value in case of indexed loads), change uses of the
13267   // chain value into uses of the chain input (i.e. delete the dead load).
13268   if (!LD->isVolatile()) {
13269     if (N->getValueType(1) == MVT::Other) {
13270       // Unindexed loads.
13271       if (!N->hasAnyUseOfValue(0)) {
13272         // It's not safe to use the two value CombineTo variant here. e.g.
13273         // v1, chain2 = load chain1, loc
13274         // v2, chain3 = load chain2, loc
13275         // v3         = add v2, c
13276         // Now we replace use of chain2 with chain1.  This makes the second load
13277         // isomorphic to the one we are deleting, and thus makes this load live.
13278         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
13279                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
13280                    dbgs() << "\n");
13281         WorklistRemover DeadNodes(*this);
13282         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13283         AddUsersToWorklist(Chain.getNode());
13284         if (N->use_empty())
13285           deleteAndRecombine(N);
13286
13287         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13288       }
13289     } else {
13290       // Indexed loads.
13291       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
13292
13293       // If this load has an opaque TargetConstant offset, then we cannot split
13294       // the indexing into an add/sub directly (that TargetConstant may not be
13295       // valid for a different type of node, and we cannot convert an opaque
13296       // target constant into a regular constant).
13297       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
13298                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
13299
13300       if (!N->hasAnyUseOfValue(0) &&
13301           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
13302         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
13303         SDValue Index;
13304         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
13305           Index = SplitIndexingFromLoad(LD);
13306           // Try to fold the base pointer arithmetic into subsequent loads and
13307           // stores.
13308           AddUsersToWorklist(N);
13309         } else
13310           Index = DAG.getUNDEF(N->getValueType(1));
13311         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
13312                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
13313                    dbgs() << " and 2 other values\n");
13314         WorklistRemover DeadNodes(*this);
13315         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
13316         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
13317         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
13318         deleteAndRecombine(N);
13319         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13320       }
13321     }
13322   }
13323
13324   // If this load is directly stored, replace the load value with the stored
13325   // value.
13326   if (auto V = ForwardStoreValueToDirectLoad(LD))
13327     return V;
13328
13329   // Try to infer better alignment information than the load already has.
13330   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
13331     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13332       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
13333         SDValue NewLoad = DAG.getExtLoad(
13334             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
13335             LD->getPointerInfo(), LD->getMemoryVT(), Align,
13336             LD->getMemOperand()->getFlags(), LD->getAAInfo());
13337         // NewLoad will always be N as we are only refining the alignment
13338         assert(NewLoad.getNode() == N);
13339         (void)NewLoad;
13340       }
13341     }
13342   }
13343
13344   if (LD->isUnindexed()) {
13345     // Walk up chain skipping non-aliasing memory nodes.
13346     SDValue BetterChain = FindBetterChain(LD, Chain);
13347
13348     // If there is a better chain.
13349     if (Chain != BetterChain) {
13350       SDValue ReplLoad;
13351
13352       // Replace the chain to void dependency.
13353       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
13354         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
13355                                BetterChain, Ptr, LD->getMemOperand());
13356       } else {
13357         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
13358                                   LD->getValueType(0),
13359                                   BetterChain, Ptr, LD->getMemoryVT(),
13360                                   LD->getMemOperand());
13361       }
13362
13363       // Create token factor to keep old chain connected.
13364       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
13365                                   MVT::Other, Chain, ReplLoad.getValue(1));
13366
13367       // Replace uses with load result and token factor
13368       return CombineTo(N, ReplLoad.getValue(0), Token);
13369     }
13370   }
13371
13372   // Try transforming N to an indexed load.
13373   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13374     return SDValue(N, 0);
13375
13376   // Try to slice up N to more direct loads if the slices are mapped to
13377   // different register banks or pairing can take place.
13378   if (SliceUpLoad(N))
13379     return SDValue(N, 0);
13380
13381   return SDValue();
13382 }
13383
13384 namespace {
13385
13386 /// Helper structure used to slice a load in smaller loads.
13387 /// Basically a slice is obtained from the following sequence:
13388 /// Origin = load Ty1, Base
13389 /// Shift = srl Ty1 Origin, CstTy Amount
13390 /// Inst = trunc Shift to Ty2
13391 ///
13392 /// Then, it will be rewritten into:
13393 /// Slice = load SliceTy, Base + SliceOffset
13394 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
13395 ///
13396 /// SliceTy is deduced from the number of bits that are actually used to
13397 /// build Inst.
13398 struct LoadedSlice {
13399   /// Helper structure used to compute the cost of a slice.
13400   struct Cost {
13401     /// Are we optimizing for code size.
13402     bool ForCodeSize;
13403
13404     /// Various cost.
13405     unsigned Loads = 0;
13406     unsigned Truncates = 0;
13407     unsigned CrossRegisterBanksCopies = 0;
13408     unsigned ZExts = 0;
13409     unsigned Shift = 0;
13410
13411     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
13412
13413     /// Get the cost of one isolated slice.
13414     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
13415         : ForCodeSize(ForCodeSize), Loads(1) {
13416       EVT TruncType = LS.Inst->getValueType(0);
13417       EVT LoadedType = LS.getLoadedType();
13418       if (TruncType != LoadedType &&
13419           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
13420         ZExts = 1;
13421     }
13422
13423     /// Account for slicing gain in the current cost.
13424     /// Slicing provide a few gains like removing a shift or a
13425     /// truncate. This method allows to grow the cost of the original
13426     /// load with the gain from this slice.
13427     void addSliceGain(const LoadedSlice &LS) {
13428       // Each slice saves a truncate.
13429       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
13430       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
13431                               LS.Inst->getValueType(0)))
13432         ++Truncates;
13433       // If there is a shift amount, this slice gets rid of it.
13434       if (LS.Shift)
13435         ++Shift;
13436       // If this slice can merge a cross register bank copy, account for it.
13437       if (LS.canMergeExpensiveCrossRegisterBankCopy())
13438         ++CrossRegisterBanksCopies;
13439     }
13440
13441     Cost &operator+=(const Cost &RHS) {
13442       Loads += RHS.Loads;
13443       Truncates += RHS.Truncates;
13444       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
13445       ZExts += RHS.ZExts;
13446       Shift += RHS.Shift;
13447       return *this;
13448     }
13449
13450     bool operator==(const Cost &RHS) const {
13451       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
13452              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
13453              ZExts == RHS.ZExts && Shift == RHS.Shift;
13454     }
13455
13456     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
13457
13458     bool operator<(const Cost &RHS) const {
13459       // Assume cross register banks copies are as expensive as loads.
13460       // FIXME: Do we want some more target hooks?
13461       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
13462       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
13463       // Unless we are optimizing for code size, consider the
13464       // expensive operation first.
13465       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
13466         return ExpensiveOpsLHS < ExpensiveOpsRHS;
13467       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
13468              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
13469     }
13470
13471     bool operator>(const Cost &RHS) const { return RHS < *this; }
13472
13473     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
13474
13475     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
13476   };
13477
13478   // The last instruction that represent the slice. This should be a
13479   // truncate instruction.
13480   SDNode *Inst;
13481
13482   // The original load instruction.
13483   LoadSDNode *Origin;
13484
13485   // The right shift amount in bits from the original load.
13486   unsigned Shift;
13487
13488   // The DAG from which Origin came from.
13489   // This is used to get some contextual information about legal types, etc.
13490   SelectionDAG *DAG;
13491
13492   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
13493               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
13494       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
13495
13496   /// Get the bits used in a chunk of bits \p BitWidth large.
13497   /// \return Result is \p BitWidth and has used bits set to 1 and
13498   ///         not used bits set to 0.
13499   APInt getUsedBits() const {
13500     // Reproduce the trunc(lshr) sequence:
13501     // - Start from the truncated value.
13502     // - Zero extend to the desired bit width.
13503     // - Shift left.
13504     assert(Origin && "No original load to compare against.");
13505     unsigned BitWidth = Origin->getValueSizeInBits(0);
13506     assert(Inst && "This slice is not bound to an instruction");
13507     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
13508            "Extracted slice is bigger than the whole type!");
13509     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
13510     UsedBits.setAllBits();
13511     UsedBits = UsedBits.zext(BitWidth);
13512     UsedBits <<= Shift;
13513     return UsedBits;
13514   }
13515
13516   /// Get the size of the slice to be loaded in bytes.
13517   unsigned getLoadedSize() const {
13518     unsigned SliceSize = getUsedBits().countPopulation();
13519     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
13520     return SliceSize / 8;
13521   }
13522
13523   /// Get the type that will be loaded for this slice.
13524   /// Note: This may not be the final type for the slice.
13525   EVT getLoadedType() const {
13526     assert(DAG && "Missing context");
13527     LLVMContext &Ctxt = *DAG->getContext();
13528     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
13529   }
13530
13531   /// Get the alignment of the load used for this slice.
13532   unsigned getAlignment() const {
13533     unsigned Alignment = Origin->getAlignment();
13534     unsigned Offset = getOffsetFromBase();
13535     if (Offset != 0)
13536       Alignment = MinAlign(Alignment, Alignment + Offset);
13537     return Alignment;
13538   }
13539
13540   /// Check if this slice can be rewritten with legal operations.
13541   bool isLegal() const {
13542     // An invalid slice is not legal.
13543     if (!Origin || !Inst || !DAG)
13544       return false;
13545
13546     // Offsets are for indexed load only, we do not handle that.
13547     if (!Origin->getOffset().isUndef())
13548       return false;
13549
13550     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13551
13552     // Check that the type is legal.
13553     EVT SliceType = getLoadedType();
13554     if (!TLI.isTypeLegal(SliceType))
13555       return false;
13556
13557     // Check that the load is legal for this type.
13558     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
13559       return false;
13560
13561     // Check that the offset can be computed.
13562     // 1. Check its type.
13563     EVT PtrType = Origin->getBasePtr().getValueType();
13564     if (PtrType == MVT::Untyped || PtrType.isExtended())
13565       return false;
13566
13567     // 2. Check that it fits in the immediate.
13568     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
13569       return false;
13570
13571     // 3. Check that the computation is legal.
13572     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
13573       return false;
13574
13575     // Check that the zext is legal if it needs one.
13576     EVT TruncateType = Inst->getValueType(0);
13577     if (TruncateType != SliceType &&
13578         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
13579       return false;
13580
13581     return true;
13582   }
13583
13584   /// Get the offset in bytes of this slice in the original chunk of
13585   /// bits.
13586   /// \pre DAG != nullptr.
13587   uint64_t getOffsetFromBase() const {
13588     assert(DAG && "Missing context.");
13589     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
13590     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
13591     uint64_t Offset = Shift / 8;
13592     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
13593     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
13594            "The size of the original loaded type is not a multiple of a"
13595            " byte.");
13596     // If Offset is bigger than TySizeInBytes, it means we are loading all
13597     // zeros. This should have been optimized before in the process.
13598     assert(TySizeInBytes > Offset &&
13599            "Invalid shift amount for given loaded size");
13600     if (IsBigEndian)
13601       Offset = TySizeInBytes - Offset - getLoadedSize();
13602     return Offset;
13603   }
13604
13605   /// Generate the sequence of instructions to load the slice
13606   /// represented by this object and redirect the uses of this slice to
13607   /// this new sequence of instructions.
13608   /// \pre this->Inst && this->Origin are valid Instructions and this
13609   /// object passed the legal check: LoadedSlice::isLegal returned true.
13610   /// \return The last instruction of the sequence used to load the slice.
13611   SDValue loadSlice() const {
13612     assert(Inst && Origin && "Unable to replace a non-existing slice.");
13613     const SDValue &OldBaseAddr = Origin->getBasePtr();
13614     SDValue BaseAddr = OldBaseAddr;
13615     // Get the offset in that chunk of bytes w.r.t. the endianness.
13616     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
13617     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
13618     if (Offset) {
13619       // BaseAddr = BaseAddr + Offset.
13620       EVT ArithType = BaseAddr.getValueType();
13621       SDLoc DL(Origin);
13622       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
13623                               DAG->getConstant(Offset, DL, ArithType));
13624     }
13625
13626     // Create the type of the loaded slice according to its size.
13627     EVT SliceType = getLoadedType();
13628
13629     // Create the load for the slice.
13630     SDValue LastInst =
13631         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
13632                      Origin->getPointerInfo().getWithOffset(Offset),
13633                      getAlignment(), Origin->getMemOperand()->getFlags());
13634     // If the final type is not the same as the loaded type, this means that
13635     // we have to pad with zero. Create a zero extend for that.
13636     EVT FinalType = Inst->getValueType(0);
13637     if (SliceType != FinalType)
13638       LastInst =
13639           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
13640     return LastInst;
13641   }
13642
13643   /// Check if this slice can be merged with an expensive cross register
13644   /// bank copy. E.g.,
13645   /// i = load i32
13646   /// f = bitcast i32 i to float
13647   bool canMergeExpensiveCrossRegisterBankCopy() const {
13648     if (!Inst || !Inst->hasOneUse())
13649       return false;
13650     SDNode *Use = *Inst->use_begin();
13651     if (Use->getOpcode() != ISD::BITCAST)
13652       return false;
13653     assert(DAG && "Missing context");
13654     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13655     EVT ResVT = Use->getValueType(0);
13656     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
13657     const TargetRegisterClass *ArgRC =
13658         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
13659     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
13660       return false;
13661
13662     // At this point, we know that we perform a cross-register-bank copy.
13663     // Check if it is expensive.
13664     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
13665     // Assume bitcasts are cheap, unless both register classes do not
13666     // explicitly share a common sub class.
13667     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
13668       return false;
13669
13670     // Check if it will be merged with the load.
13671     // 1. Check the alignment constraint.
13672     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
13673         ResVT.getTypeForEVT(*DAG->getContext()));
13674
13675     if (RequiredAlignment > getAlignment())
13676       return false;
13677
13678     // 2. Check that the load is a legal operation for that type.
13679     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
13680       return false;
13681
13682     // 3. Check that we do not have a zext in the way.
13683     if (Inst->getValueType(0) != getLoadedType())
13684       return false;
13685
13686     return true;
13687   }
13688 };
13689
13690 } // end anonymous namespace
13691
13692 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
13693 /// \p UsedBits looks like 0..0 1..1 0..0.
13694 static bool areUsedBitsDense(const APInt &UsedBits) {
13695   // If all the bits are one, this is dense!
13696   if (UsedBits.isAllOnesValue())
13697     return true;
13698
13699   // Get rid of the unused bits on the right.
13700   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
13701   // Get rid of the unused bits on the left.
13702   if (NarrowedUsedBits.countLeadingZeros())
13703     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
13704   // Check that the chunk of bits is completely used.
13705   return NarrowedUsedBits.isAllOnesValue();
13706 }
13707
13708 /// Check whether or not \p First and \p Second are next to each other
13709 /// in memory. This means that there is no hole between the bits loaded
13710 /// by \p First and the bits loaded by \p Second.
13711 static bool areSlicesNextToEachOther(const LoadedSlice &First,
13712                                      const LoadedSlice &Second) {
13713   assert(First.Origin == Second.Origin && First.Origin &&
13714          "Unable to match different memory origins.");
13715   APInt UsedBits = First.getUsedBits();
13716   assert((UsedBits & Second.getUsedBits()) == 0 &&
13717          "Slices are not supposed to overlap.");
13718   UsedBits |= Second.getUsedBits();
13719   return areUsedBitsDense(UsedBits);
13720 }
13721
13722 /// Adjust the \p GlobalLSCost according to the target
13723 /// paring capabilities and the layout of the slices.
13724 /// \pre \p GlobalLSCost should account for at least as many loads as
13725 /// there is in the slices in \p LoadedSlices.
13726 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13727                                  LoadedSlice::Cost &GlobalLSCost) {
13728   unsigned NumberOfSlices = LoadedSlices.size();
13729   // If there is less than 2 elements, no pairing is possible.
13730   if (NumberOfSlices < 2)
13731     return;
13732
13733   // Sort the slices so that elements that are likely to be next to each
13734   // other in memory are next to each other in the list.
13735   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
13736     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
13737     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
13738   });
13739   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
13740   // First (resp. Second) is the first (resp. Second) potentially candidate
13741   // to be placed in a paired load.
13742   const LoadedSlice *First = nullptr;
13743   const LoadedSlice *Second = nullptr;
13744   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
13745                 // Set the beginning of the pair.
13746                                                            First = Second) {
13747     Second = &LoadedSlices[CurrSlice];
13748
13749     // If First is NULL, it means we start a new pair.
13750     // Get to the next slice.
13751     if (!First)
13752       continue;
13753
13754     EVT LoadedType = First->getLoadedType();
13755
13756     // If the types of the slices are different, we cannot pair them.
13757     if (LoadedType != Second->getLoadedType())
13758       continue;
13759
13760     // Check if the target supplies paired loads for this type.
13761     unsigned RequiredAlignment = 0;
13762     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
13763       // move to the next pair, this type is hopeless.
13764       Second = nullptr;
13765       continue;
13766     }
13767     // Check if we meet the alignment requirement.
13768     if (RequiredAlignment > First->getAlignment())
13769       continue;
13770
13771     // Check that both loads are next to each other in memory.
13772     if (!areSlicesNextToEachOther(*First, *Second))
13773       continue;
13774
13775     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
13776     --GlobalLSCost.Loads;
13777     // Move to the next pair.
13778     Second = nullptr;
13779   }
13780 }
13781
13782 /// Check the profitability of all involved LoadedSlice.
13783 /// Currently, it is considered profitable if there is exactly two
13784 /// involved slices (1) which are (2) next to each other in memory, and
13785 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
13786 ///
13787 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
13788 /// the elements themselves.
13789 ///
13790 /// FIXME: When the cost model will be mature enough, we can relax
13791 /// constraints (1) and (2).
13792 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13793                                 const APInt &UsedBits, bool ForCodeSize) {
13794   unsigned NumberOfSlices = LoadedSlices.size();
13795   if (StressLoadSlicing)
13796     return NumberOfSlices > 1;
13797
13798   // Check (1).
13799   if (NumberOfSlices != 2)
13800     return false;
13801
13802   // Check (2).
13803   if (!areUsedBitsDense(UsedBits))
13804     return false;
13805
13806   // Check (3).
13807   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
13808   // The original code has one big load.
13809   OrigCost.Loads = 1;
13810   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
13811     const LoadedSlice &LS = LoadedSlices[CurrSlice];
13812     // Accumulate the cost of all the slices.
13813     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
13814     GlobalSlicingCost += SliceCost;
13815
13816     // Account as cost in the original configuration the gain obtained
13817     // with the current slices.
13818     OrigCost.addSliceGain(LS);
13819   }
13820
13821   // If the target supports paired load, adjust the cost accordingly.
13822   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
13823   return OrigCost > GlobalSlicingCost;
13824 }
13825
13826 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
13827 /// operations, split it in the various pieces being extracted.
13828 ///
13829 /// This sort of thing is introduced by SROA.
13830 /// This slicing takes care not to insert overlapping loads.
13831 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
13832 bool DAGCombiner::SliceUpLoad(SDNode *N) {
13833   if (Level < AfterLegalizeDAG)
13834     return false;
13835
13836   LoadSDNode *LD = cast<LoadSDNode>(N);
13837   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
13838       !LD->getValueType(0).isInteger())
13839     return false;
13840
13841   // Keep track of already used bits to detect overlapping values.
13842   // In that case, we will just abort the transformation.
13843   APInt UsedBits(LD->getValueSizeInBits(0), 0);
13844
13845   SmallVector<LoadedSlice, 4> LoadedSlices;
13846
13847   // Check if this load is used as several smaller chunks of bits.
13848   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
13849   // of computation for each trunc.
13850   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
13851        UI != UIEnd; ++UI) {
13852     // Skip the uses of the chain.
13853     if (UI.getUse().getResNo() != 0)
13854       continue;
13855
13856     SDNode *User = *UI;
13857     unsigned Shift = 0;
13858
13859     // Check if this is a trunc(lshr).
13860     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
13861         isa<ConstantSDNode>(User->getOperand(1))) {
13862       Shift = User->getConstantOperandVal(1);
13863       User = *User->use_begin();
13864     }
13865
13866     // At this point, User is a Truncate, iff we encountered, trunc or
13867     // trunc(lshr).
13868     if (User->getOpcode() != ISD::TRUNCATE)
13869       return false;
13870
13871     // The width of the type must be a power of 2 and greater than 8-bits.
13872     // Otherwise the load cannot be represented in LLVM IR.
13873     // Moreover, if we shifted with a non-8-bits multiple, the slice
13874     // will be across several bytes. We do not support that.
13875     unsigned Width = User->getValueSizeInBits(0);
13876     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
13877       return false;
13878
13879     // Build the slice for this chain of computations.
13880     LoadedSlice LS(User, LD, Shift, &DAG);
13881     APInt CurrentUsedBits = LS.getUsedBits();
13882
13883     // Check if this slice overlaps with another.
13884     if ((CurrentUsedBits & UsedBits) != 0)
13885       return false;
13886     // Update the bits used globally.
13887     UsedBits |= CurrentUsedBits;
13888
13889     // Check if the new slice would be legal.
13890     if (!LS.isLegal())
13891       return false;
13892
13893     // Record the slice.
13894     LoadedSlices.push_back(LS);
13895   }
13896
13897   // Abort slicing if it does not seem to be profitable.
13898   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
13899     return false;
13900
13901   ++SlicedLoads;
13902
13903   // Rewrite each chain to use an independent load.
13904   // By construction, each chain can be represented by a unique load.
13905
13906   // Prepare the argument for the new token factor for all the slices.
13907   SmallVector<SDValue, 8> ArgChains;
13908   for (SmallVectorImpl<LoadedSlice>::const_iterator
13909            LSIt = LoadedSlices.begin(),
13910            LSItEnd = LoadedSlices.end();
13911        LSIt != LSItEnd; ++LSIt) {
13912     SDValue SliceInst = LSIt->loadSlice();
13913     CombineTo(LSIt->Inst, SliceInst, true);
13914     if (SliceInst.getOpcode() != ISD::LOAD)
13915       SliceInst = SliceInst.getOperand(0);
13916     assert(SliceInst->getOpcode() == ISD::LOAD &&
13917            "It takes more than a zext to get to the loaded slice!!");
13918     ArgChains.push_back(SliceInst.getValue(1));
13919   }
13920
13921   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
13922                               ArgChains);
13923   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13924   AddToWorklist(Chain.getNode());
13925   return true;
13926 }
13927
13928 /// Check to see if V is (and load (ptr), imm), where the load is having
13929 /// specific bytes cleared out.  If so, return the byte size being masked out
13930 /// and the shift amount.
13931 static std::pair<unsigned, unsigned>
13932 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
13933   std::pair<unsigned, unsigned> Result(0, 0);
13934
13935   // Check for the structure we're looking for.
13936   if (V->getOpcode() != ISD::AND ||
13937       !isa<ConstantSDNode>(V->getOperand(1)) ||
13938       !ISD::isNormalLoad(V->getOperand(0).getNode()))
13939     return Result;
13940
13941   // Check the chain and pointer.
13942   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
13943   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
13944
13945   // This only handles simple types.
13946   if (V.getValueType() != MVT::i16 &&
13947       V.getValueType() != MVT::i32 &&
13948       V.getValueType() != MVT::i64)
13949     return Result;
13950
13951   // Check the constant mask.  Invert it so that the bits being masked out are
13952   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
13953   // follow the sign bit for uniformity.
13954   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
13955   unsigned NotMaskLZ = countLeadingZeros(NotMask);
13956   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
13957   unsigned NotMaskTZ = countTrailingZeros(NotMask);
13958   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
13959   if (NotMaskLZ == 64) return Result;  // All zero mask.
13960
13961   // See if we have a continuous run of bits.  If so, we have 0*1+0*
13962   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
13963     return Result;
13964
13965   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
13966   if (V.getValueType() != MVT::i64 && NotMaskLZ)
13967     NotMaskLZ -= 64-V.getValueSizeInBits();
13968
13969   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
13970   switch (MaskedBytes) {
13971   case 1:
13972   case 2:
13973   case 4: break;
13974   default: return Result; // All one mask, or 5-byte mask.
13975   }
13976
13977   // Verify that the first bit starts at a multiple of mask so that the access
13978   // is aligned the same as the access width.
13979   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
13980
13981   // For narrowing to be valid, it must be the case that the load the
13982   // immediately preceeding memory operation before the store.
13983   if (LD == Chain.getNode())
13984     ; // ok.
13985   else if (Chain->getOpcode() == ISD::TokenFactor &&
13986            SDValue(LD, 1).hasOneUse()) {
13987     // LD has only 1 chain use so they are no indirect dependencies.
13988     bool isOk = false;
13989     for (const SDValue &ChainOp : Chain->op_values())
13990       if (ChainOp.getNode() == LD) {
13991         isOk = true;
13992         break;
13993       }
13994     if (!isOk)
13995       return Result;
13996   } else
13997     return Result; // Fail.
13998
13999   Result.first = MaskedBytes;
14000   Result.second = NotMaskTZ/8;
14001   return Result;
14002 }
14003
14004 /// Check to see if IVal is something that provides a value as specified by
14005 /// MaskInfo. If so, replace the specified store with a narrower store of
14006 /// truncated IVal.
14007 static SDNode *
14008 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
14009                                 SDValue IVal, StoreSDNode *St,
14010                                 DAGCombiner *DC) {
14011   unsigned NumBytes = MaskInfo.first;
14012   unsigned ByteShift = MaskInfo.second;
14013   SelectionDAG &DAG = DC->getDAG();
14014
14015   // Check to see if IVal is all zeros in the part being masked in by the 'or'
14016   // that uses this.  If not, this is not a replacement.
14017   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
14018                                   ByteShift*8, (ByteShift+NumBytes)*8);
14019   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
14020
14021   // Check that it is legal on the target to do this.  It is legal if the new
14022   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
14023   // legalization.
14024   MVT VT = MVT::getIntegerVT(NumBytes*8);
14025   if (!DC->isTypeLegal(VT))
14026     return nullptr;
14027
14028   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
14029   // shifted by ByteShift and truncated down to NumBytes.
14030   if (ByteShift) {
14031     SDLoc DL(IVal);
14032     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
14033                        DAG.getConstant(ByteShift*8, DL,
14034                                     DC->getShiftAmountTy(IVal.getValueType())));
14035   }
14036
14037   // Figure out the offset for the store and the alignment of the access.
14038   unsigned StOffset;
14039   unsigned NewAlign = St->getAlignment();
14040
14041   if (DAG.getDataLayout().isLittleEndian())
14042     StOffset = ByteShift;
14043   else
14044     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
14045
14046   SDValue Ptr = St->getBasePtr();
14047   if (StOffset) {
14048     SDLoc DL(IVal);
14049     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
14050                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
14051     NewAlign = MinAlign(NewAlign, StOffset);
14052   }
14053
14054   // Truncate down to the new size.
14055   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
14056
14057   ++OpsNarrowed;
14058   return DAG
14059       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
14060                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
14061       .getNode();
14062 }
14063
14064 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
14065 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
14066 /// narrowing the load and store if it would end up being a win for performance
14067 /// or code size.
14068 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
14069   StoreSDNode *ST  = cast<StoreSDNode>(N);
14070   if (ST->isVolatile())
14071     return SDValue();
14072
14073   SDValue Chain = ST->getChain();
14074   SDValue Value = ST->getValue();
14075   SDValue Ptr   = ST->getBasePtr();
14076   EVT VT = Value.getValueType();
14077
14078   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
14079     return SDValue();
14080
14081   unsigned Opc = Value.getOpcode();
14082
14083   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
14084   // is a byte mask indicating a consecutive number of bytes, check to see if
14085   // Y is known to provide just those bytes.  If so, we try to replace the
14086   // load + replace + store sequence with a single (narrower) store, which makes
14087   // the load dead.
14088   if (Opc == ISD::OR) {
14089     std::pair<unsigned, unsigned> MaskedLoad;
14090     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
14091     if (MaskedLoad.first)
14092       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14093                                                   Value.getOperand(1), ST,this))
14094         return SDValue(NewST, 0);
14095
14096     // Or is commutative, so try swapping X and Y.
14097     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
14098     if (MaskedLoad.first)
14099       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14100                                                   Value.getOperand(0), ST,this))
14101         return SDValue(NewST, 0);
14102   }
14103
14104   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
14105       Value.getOperand(1).getOpcode() != ISD::Constant)
14106     return SDValue();
14107
14108   SDValue N0 = Value.getOperand(0);
14109   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14110       Chain == SDValue(N0.getNode(), 1)) {
14111     LoadSDNode *LD = cast<LoadSDNode>(N0);
14112     if (LD->getBasePtr() != Ptr ||
14113         LD->getPointerInfo().getAddrSpace() !=
14114         ST->getPointerInfo().getAddrSpace())
14115       return SDValue();
14116
14117     // Find the type to narrow it the load / op / store to.
14118     SDValue N1 = Value.getOperand(1);
14119     unsigned BitWidth = N1.getValueSizeInBits();
14120     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
14121     if (Opc == ISD::AND)
14122       Imm ^= APInt::getAllOnesValue(BitWidth);
14123     if (Imm == 0 || Imm.isAllOnesValue())
14124       return SDValue();
14125     unsigned ShAmt = Imm.countTrailingZeros();
14126     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
14127     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
14128     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14129     // The narrowing should be profitable, the load/store operation should be
14130     // legal (or custom) and the store size should be equal to the NewVT width.
14131     while (NewBW < BitWidth &&
14132            (NewVT.getStoreSizeInBits() != NewBW ||
14133             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
14134             !TLI.isNarrowingProfitable(VT, NewVT))) {
14135       NewBW = NextPowerOf2(NewBW);
14136       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14137     }
14138     if (NewBW >= BitWidth)
14139       return SDValue();
14140
14141     // If the lsb changed does not start at the type bitwidth boundary,
14142     // start at the previous one.
14143     if (ShAmt % NewBW)
14144       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
14145     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
14146                                    std::min(BitWidth, ShAmt + NewBW));
14147     if ((Imm & Mask) == Imm) {
14148       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
14149       if (Opc == ISD::AND)
14150         NewImm ^= APInt::getAllOnesValue(NewBW);
14151       uint64_t PtrOff = ShAmt / 8;
14152       // For big endian targets, we need to adjust the offset to the pointer to
14153       // load the correct bytes.
14154       if (DAG.getDataLayout().isBigEndian())
14155         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
14156
14157       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
14158       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
14159       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
14160         return SDValue();
14161
14162       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
14163                                    Ptr.getValueType(), Ptr,
14164                                    DAG.getConstant(PtrOff, SDLoc(LD),
14165                                                    Ptr.getValueType()));
14166       SDValue NewLD =
14167           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
14168                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
14169                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
14170       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
14171                                    DAG.getConstant(NewImm, SDLoc(Value),
14172                                                    NewVT));
14173       SDValue NewST =
14174           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
14175                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
14176
14177       AddToWorklist(NewPtr.getNode());
14178       AddToWorklist(NewLD.getNode());
14179       AddToWorklist(NewVal.getNode());
14180       WorklistRemover DeadNodes(*this);
14181       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
14182       ++OpsNarrowed;
14183       return NewST;
14184     }
14185   }
14186
14187   return SDValue();
14188 }
14189
14190 /// For a given floating point load / store pair, if the load value isn't used
14191 /// by any other operations, then consider transforming the pair to integer
14192 /// load / store operations if the target deems the transformation profitable.
14193 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
14194   StoreSDNode *ST  = cast<StoreSDNode>(N);
14195   SDValue Chain = ST->getChain();
14196   SDValue Value = ST->getValue();
14197   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
14198       Value.hasOneUse() &&
14199       Chain == SDValue(Value.getNode(), 1)) {
14200     LoadSDNode *LD = cast<LoadSDNode>(Value);
14201     EVT VT = LD->getMemoryVT();
14202     if (!VT.isFloatingPoint() ||
14203         VT != ST->getMemoryVT() ||
14204         LD->isNonTemporal() ||
14205         ST->isNonTemporal() ||
14206         LD->getPointerInfo().getAddrSpace() != 0 ||
14207         ST->getPointerInfo().getAddrSpace() != 0)
14208       return SDValue();
14209
14210     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
14211     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
14212         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
14213         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
14214         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
14215       return SDValue();
14216
14217     unsigned LDAlign = LD->getAlignment();
14218     unsigned STAlign = ST->getAlignment();
14219     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
14220     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
14221     if (LDAlign < ABIAlign || STAlign < ABIAlign)
14222       return SDValue();
14223
14224     SDValue NewLD =
14225         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
14226                     LD->getPointerInfo(), LDAlign);
14227
14228     SDValue NewST =
14229         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
14230                      ST->getPointerInfo(), STAlign);
14231
14232     AddToWorklist(NewLD.getNode());
14233     AddToWorklist(NewST.getNode());
14234     WorklistRemover DeadNodes(*this);
14235     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
14236     ++LdStFP2Int;
14237     return NewST;
14238   }
14239
14240   return SDValue();
14241 }
14242
14243 // This is a helper function for visitMUL to check the profitability
14244 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
14245 // MulNode is the original multiply, AddNode is (add x, c1),
14246 // and ConstNode is c2.
14247 //
14248 // If the (add x, c1) has multiple uses, we could increase
14249 // the number of adds if we make this transformation.
14250 // It would only be worth doing this if we can remove a
14251 // multiply in the process. Check for that here.
14252 // To illustrate:
14253 //     (A + c1) * c3
14254 //     (A + c2) * c3
14255 // We're checking for cases where we have common "c3 * A" expressions.
14256 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
14257                                               SDValue &AddNode,
14258                                               SDValue &ConstNode) {
14259   APInt Val;
14260
14261   // If the add only has one use, this would be OK to do.
14262   if (AddNode.getNode()->hasOneUse())
14263     return true;
14264
14265   // Walk all the users of the constant with which we're multiplying.
14266   for (SDNode *Use : ConstNode->uses()) {
14267     if (Use == MulNode) // This use is the one we're on right now. Skip it.
14268       continue;
14269
14270     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
14271       SDNode *OtherOp;
14272       SDNode *MulVar = AddNode.getOperand(0).getNode();
14273
14274       // OtherOp is what we're multiplying against the constant.
14275       if (Use->getOperand(0) == ConstNode)
14276         OtherOp = Use->getOperand(1).getNode();
14277       else
14278         OtherOp = Use->getOperand(0).getNode();
14279
14280       // Check to see if multiply is with the same operand of our "add".
14281       //
14282       //     ConstNode  = CONST
14283       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
14284       //     ...
14285       //     AddNode  = (A + c1)  <-- MulVar is A.
14286       //         = AddNode * ConstNode   <-- current visiting instruction.
14287       //
14288       // If we make this transformation, we will have a common
14289       // multiply (ConstNode * A) that we can save.
14290       if (OtherOp == MulVar)
14291         return true;
14292
14293       // Now check to see if a future expansion will give us a common
14294       // multiply.
14295       //
14296       //     ConstNode  = CONST
14297       //     AddNode    = (A + c1)
14298       //     ...   = AddNode * ConstNode <-- current visiting instruction.
14299       //     ...
14300       //     OtherOp = (A + c2)
14301       //     Use     = OtherOp * ConstNode <-- visiting Use.
14302       //
14303       // If we make this transformation, we will have a common
14304       // multiply (CONST * A) after we also do the same transformation
14305       // to the "t2" instruction.
14306       if (OtherOp->getOpcode() == ISD::ADD &&
14307           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
14308           OtherOp->getOperand(0).getNode() == MulVar)
14309         return true;
14310     }
14311   }
14312
14313   // Didn't find a case where this would be profitable.
14314   return false;
14315 }
14316
14317 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
14318                                          unsigned NumStores) {
14319   SmallVector<SDValue, 8> Chains;
14320   SmallPtrSet<const SDNode *, 8> Visited;
14321   SDLoc StoreDL(StoreNodes[0].MemNode);
14322
14323   for (unsigned i = 0; i < NumStores; ++i) {
14324     Visited.insert(StoreNodes[i].MemNode);
14325   }
14326
14327   // don't include nodes that are children
14328   for (unsigned i = 0; i < NumStores; ++i) {
14329     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
14330       Chains.push_back(StoreNodes[i].MemNode->getChain());
14331   }
14332
14333   assert(Chains.size() > 0 && "Chain should have generated a chain");
14334   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
14335 }
14336
14337 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
14338     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
14339     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
14340   // Make sure we have something to merge.
14341   if (NumStores < 2)
14342     return false;
14343
14344   // The latest Node in the DAG.
14345   SDLoc DL(StoreNodes[0].MemNode);
14346
14347   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
14348   unsigned SizeInBits = NumStores * ElementSizeBits;
14349   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14350
14351   EVT StoreTy;
14352   if (UseVector) {
14353     unsigned Elts = NumStores * NumMemElts;
14354     // Get the type for the merged vector store.
14355     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14356   } else
14357     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
14358
14359   SDValue StoredVal;
14360   if (UseVector) {
14361     if (IsConstantSrc) {
14362       SmallVector<SDValue, 8> BuildVector;
14363       for (unsigned I = 0; I != NumStores; ++I) {
14364         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
14365         SDValue Val = St->getValue();
14366         // If constant is of the wrong type, convert it now.
14367         if (MemVT != Val.getValueType()) {
14368           Val = peekThroughBitcasts(Val);
14369           // Deal with constants of wrong size.
14370           if (ElementSizeBits != Val.getValueSizeInBits()) {
14371             EVT IntMemVT =
14372                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
14373             if (isa<ConstantFPSDNode>(Val)) {
14374               // Not clear how to truncate FP values.
14375               return false;
14376             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
14377               Val = DAG.getConstant(C->getAPIntValue()
14378                                         .zextOrTrunc(Val.getValueSizeInBits())
14379                                         .zextOrTrunc(ElementSizeBits),
14380                                     SDLoc(C), IntMemVT);
14381           }
14382           // Make sure correctly size type is the correct type.
14383           Val = DAG.getBitcast(MemVT, Val);
14384         }
14385         BuildVector.push_back(Val);
14386       }
14387       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14388                                                : ISD::BUILD_VECTOR,
14389                               DL, StoreTy, BuildVector);
14390     } else {
14391       SmallVector<SDValue, 8> Ops;
14392       for (unsigned i = 0; i < NumStores; ++i) {
14393         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14394         SDValue Val = peekThroughBitcasts(St->getValue());
14395         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
14396         // type MemVT. If the underlying value is not the correct
14397         // type, but it is an extraction of an appropriate vector we
14398         // can recast Val to be of the correct type. This may require
14399         // converting between EXTRACT_VECTOR_ELT and
14400         // EXTRACT_SUBVECTOR.
14401         if ((MemVT != Val.getValueType()) &&
14402             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14403              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
14404           EVT MemVTScalarTy = MemVT.getScalarType();
14405           // We may need to add a bitcast here to get types to line up.
14406           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
14407             Val = DAG.getBitcast(MemVT, Val);
14408           } else {
14409             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
14410                                             : ISD::EXTRACT_VECTOR_ELT;
14411             SDValue Vec = Val.getOperand(0);
14412             SDValue Idx = Val.getOperand(1);
14413             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
14414           }
14415         }
14416         Ops.push_back(Val);
14417       }
14418
14419       // Build the extracted vector elements back into a vector.
14420       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14421                                                : ISD::BUILD_VECTOR,
14422                               DL, StoreTy, Ops);
14423     }
14424   } else {
14425     // We should always use a vector store when merging extracted vector
14426     // elements, so this path implies a store of constants.
14427     assert(IsConstantSrc && "Merged vector elements should use vector store");
14428
14429     APInt StoreInt(SizeInBits, 0);
14430
14431     // Construct a single integer constant which is made of the smaller
14432     // constant inputs.
14433     bool IsLE = DAG.getDataLayout().isLittleEndian();
14434     for (unsigned i = 0; i < NumStores; ++i) {
14435       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
14436       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
14437
14438       SDValue Val = St->getValue();
14439       Val = peekThroughBitcasts(Val);
14440       StoreInt <<= ElementSizeBits;
14441       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
14442         StoreInt |= C->getAPIntValue()
14443                         .zextOrTrunc(ElementSizeBits)
14444                         .zextOrTrunc(SizeInBits);
14445       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
14446         StoreInt |= C->getValueAPF()
14447                         .bitcastToAPInt()
14448                         .zextOrTrunc(ElementSizeBits)
14449                         .zextOrTrunc(SizeInBits);
14450         // If fp truncation is necessary give up for now.
14451         if (MemVT.getSizeInBits() != ElementSizeBits)
14452           return false;
14453       } else {
14454         llvm_unreachable("Invalid constant element type");
14455       }
14456     }
14457
14458     // Create the new Load and Store operations.
14459     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
14460   }
14461
14462   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14463   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
14464
14465   // make sure we use trunc store if it's necessary to be legal.
14466   SDValue NewStore;
14467   if (!UseTrunc) {
14468     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
14469                             FirstInChain->getPointerInfo(),
14470                             FirstInChain->getAlignment());
14471   } else { // Must be realized as a trunc store
14472     EVT LegalizedStoredValTy =
14473         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
14474     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
14475     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
14476     SDValue ExtendedStoreVal =
14477         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
14478                         LegalizedStoredValTy);
14479     NewStore = DAG.getTruncStore(
14480         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
14481         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
14482         FirstInChain->getAlignment(),
14483         FirstInChain->getMemOperand()->getFlags());
14484   }
14485
14486   // Replace all merged stores with the new store.
14487   for (unsigned i = 0; i < NumStores; ++i)
14488     CombineTo(StoreNodes[i].MemNode, NewStore);
14489
14490   AddToWorklist(NewChain.getNode());
14491   return true;
14492 }
14493
14494 void DAGCombiner::getStoreMergeCandidates(
14495     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
14496     SDNode *&RootNode) {
14497   // This holds the base pointer, index, and the offset in bytes from the base
14498   // pointer.
14499   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
14500   EVT MemVT = St->getMemoryVT();
14501
14502   SDValue Val = peekThroughBitcasts(St->getValue());
14503   // We must have a base and an offset.
14504   if (!BasePtr.getBase().getNode())
14505     return;
14506
14507   // Do not handle stores to undef base pointers.
14508   if (BasePtr.getBase().isUndef())
14509     return;
14510
14511   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
14512   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14513                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14514   bool IsLoadSrc = isa<LoadSDNode>(Val);
14515   BaseIndexOffset LBasePtr;
14516   // Match on loadbaseptr if relevant.
14517   EVT LoadVT;
14518   if (IsLoadSrc) {
14519     auto *Ld = cast<LoadSDNode>(Val);
14520     LBasePtr = BaseIndexOffset::match(Ld, DAG);
14521     LoadVT = Ld->getMemoryVT();
14522     // Load and store should be the same type.
14523     if (MemVT != LoadVT)
14524       return;
14525     // Loads must only have one use.
14526     if (!Ld->hasNUsesOfValue(1, 0))
14527       return;
14528     // The memory operands must not be volatile.
14529     if (Ld->isVolatile() || Ld->isIndexed())
14530       return;
14531   }
14532   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
14533                             int64_t &Offset) -> bool {
14534     if (Other->isVolatile() || Other->isIndexed())
14535       return false;
14536     SDValue Val = peekThroughBitcasts(Other->getValue());
14537     // Allow merging constants of different types as integers.
14538     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
14539                                            : Other->getMemoryVT() != MemVT;
14540     if (IsLoadSrc) {
14541       if (NoTypeMatch)
14542         return false;
14543       // The Load's Base Ptr must also match
14544       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
14545         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
14546         if (LoadVT != OtherLd->getMemoryVT())
14547           return false;
14548         // Loads must only have one use.
14549         if (!OtherLd->hasNUsesOfValue(1, 0))
14550           return false;
14551         // The memory operands must not be volatile.
14552         if (OtherLd->isVolatile() || OtherLd->isIndexed())
14553           return false;
14554         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
14555           return false;
14556       } else
14557         return false;
14558     }
14559     if (IsConstantSrc) {
14560       if (NoTypeMatch)
14561         return false;
14562       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
14563         return false;
14564     }
14565     if (IsExtractVecSrc) {
14566       // Do not merge truncated stores here.
14567       if (Other->isTruncatingStore())
14568         return false;
14569       if (!MemVT.bitsEq(Val.getValueType()))
14570         return false;
14571       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
14572           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14573         return false;
14574     }
14575     Ptr = BaseIndexOffset::match(Other, DAG);
14576     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
14577   };
14578
14579   // We looking for a root node which is an ancestor to all mergable
14580   // stores. We search up through a load, to our root and then down
14581   // through all children. For instance we will find Store{1,2,3} if
14582   // St is Store1, Store2. or Store3 where the root is not a load
14583   // which always true for nonvolatile ops. TODO: Expand
14584   // the search to find all valid candidates through multiple layers of loads.
14585   //
14586   // Root
14587   // |-------|-------|
14588   // Load    Load    Store3
14589   // |       |
14590   // Store1   Store2
14591   //
14592   // FIXME: We should be able to climb and
14593   // descend TokenFactors to find candidates as well.
14594
14595   RootNode = St->getChain().getNode();
14596
14597   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
14598     RootNode = Ldn->getChain().getNode();
14599     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
14600       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
14601         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
14602           if (I2.getOperandNo() == 0)
14603             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
14604               BaseIndexOffset Ptr;
14605               int64_t PtrDiff;
14606               if (CandidateMatch(OtherST, Ptr, PtrDiff))
14607                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14608             }
14609   } else
14610     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
14611       if (I.getOperandNo() == 0)
14612         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
14613           BaseIndexOffset Ptr;
14614           int64_t PtrDiff;
14615           if (CandidateMatch(OtherST, Ptr, PtrDiff))
14616             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14617         }
14618 }
14619
14620 // We need to check that merging these stores does not cause a loop in
14621 // the DAG. Any store candidate may depend on another candidate
14622 // indirectly through its operand (we already consider dependencies
14623 // through the chain). Check in parallel by searching up from
14624 // non-chain operands of candidates.
14625 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
14626     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
14627     SDNode *RootNode) {
14628   // FIXME: We should be able to truncate a full search of
14629   // predecessors by doing a BFS and keeping tabs the originating
14630   // stores from which worklist nodes come from in a similar way to
14631   // TokenFactor simplfication.
14632
14633   SmallPtrSet<const SDNode *, 32> Visited;
14634   SmallVector<const SDNode *, 8> Worklist;
14635
14636   // RootNode is a predecessor to all candidates so we need not search
14637   // past it. Add RootNode (peeking through TokenFactors). Do not count
14638   // these towards size check.
14639
14640   Worklist.push_back(RootNode);
14641   while (!Worklist.empty()) {
14642     auto N = Worklist.pop_back_val();
14643     if (!Visited.insert(N).second)
14644       continue; // Already present in Visited.
14645     if (N->getOpcode() == ISD::TokenFactor) {
14646       for (SDValue Op : N->ops())
14647         Worklist.push_back(Op.getNode());
14648     }
14649   }
14650
14651   // Don't count pruning nodes towards max.
14652   unsigned int Max = 1024 + Visited.size();
14653   // Search Ops of store candidates.
14654   for (unsigned i = 0; i < NumStores; ++i) {
14655     SDNode *N = StoreNodes[i].MemNode;
14656     // Of the 4 Store Operands:
14657     //   * Chain (Op 0) -> We have already considered these
14658     //                    in candidate selection and can be
14659     //                    safely ignored
14660     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
14661     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
14662     //                       but aren't necessarily fromt the same base node, so
14663     //                       cycles possible (e.g. via indexed store).
14664     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
14665     //               non-indexed stores). Not constant on all targets (e.g. ARM)
14666     //               and so can participate in a cycle.
14667     for (unsigned j = 1; j < N->getNumOperands(); ++j)
14668       Worklist.push_back(N->getOperand(j).getNode());
14669   }
14670   // Search through DAG. We can stop early if we find a store node.
14671   for (unsigned i = 0; i < NumStores; ++i)
14672     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
14673                                      Max))
14674       return false;
14675   return true;
14676 }
14677
14678 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
14679   if (OptLevel == CodeGenOpt::None)
14680     return false;
14681
14682   EVT MemVT = St->getMemoryVT();
14683   int64_t ElementSizeBytes = MemVT.getStoreSize();
14684   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14685
14686   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
14687     return false;
14688
14689   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
14690       Attribute::NoImplicitFloat);
14691
14692   // This function cannot currently deal with non-byte-sized memory sizes.
14693   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
14694     return false;
14695
14696   if (!MemVT.isSimple())
14697     return false;
14698
14699   // Perform an early exit check. Do not bother looking at stored values that
14700   // are not constants, loads, or extracted vector elements.
14701   SDValue StoredVal = peekThroughBitcasts(St->getValue());
14702   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
14703   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
14704                        isa<ConstantFPSDNode>(StoredVal);
14705   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14706                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14707
14708   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
14709     return false;
14710
14711   SmallVector<MemOpLink, 8> StoreNodes;
14712   SDNode *RootNode;
14713   // Find potential store merge candidates by searching through chain sub-DAG
14714   getStoreMergeCandidates(St, StoreNodes, RootNode);
14715
14716   // Check if there is anything to merge.
14717   if (StoreNodes.size() < 2)
14718     return false;
14719
14720   // Sort the memory operands according to their distance from the
14721   // base pointer.
14722   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
14723     return LHS.OffsetFromBase < RHS.OffsetFromBase;
14724   });
14725
14726   // Store Merge attempts to merge the lowest stores. This generally
14727   // works out as if successful, as the remaining stores are checked
14728   // after the first collection of stores is merged. However, in the
14729   // case that a non-mergeable store is found first, e.g., {p[-2],
14730   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
14731   // mergeable cases. To prevent this, we prune such stores from the
14732   // front of StoreNodes here.
14733
14734   bool RV = false;
14735   while (StoreNodes.size() > 1) {
14736     unsigned StartIdx = 0;
14737     while ((StartIdx + 1 < StoreNodes.size()) &&
14738            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
14739                StoreNodes[StartIdx + 1].OffsetFromBase)
14740       ++StartIdx;
14741
14742     // Bail if we don't have enough candidates to merge.
14743     if (StartIdx + 1 >= StoreNodes.size())
14744       return RV;
14745
14746     if (StartIdx)
14747       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
14748
14749     // Scan the memory operations on the chain and find the first
14750     // non-consecutive store memory address.
14751     unsigned NumConsecutiveStores = 1;
14752     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
14753     // Check that the addresses are consecutive starting from the second
14754     // element in the list of stores.
14755     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
14756       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
14757       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14758         break;
14759       NumConsecutiveStores = i + 1;
14760     }
14761
14762     if (NumConsecutiveStores < 2) {
14763       StoreNodes.erase(StoreNodes.begin(),
14764                        StoreNodes.begin() + NumConsecutiveStores);
14765       continue;
14766     }
14767
14768     // The node with the lowest store address.
14769     LLVMContext &Context = *DAG.getContext();
14770     const DataLayout &DL = DAG.getDataLayout();
14771
14772     // Store the constants into memory as one consecutive store.
14773     if (IsConstantSrc) {
14774       while (NumConsecutiveStores >= 2) {
14775         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14776         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14777         unsigned FirstStoreAlign = FirstInChain->getAlignment();
14778         unsigned LastLegalType = 1;
14779         unsigned LastLegalVectorType = 1;
14780         bool LastIntegerTrunc = false;
14781         bool NonZero = false;
14782         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
14783         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14784           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
14785           SDValue StoredVal = ST->getValue();
14786           bool IsElementZero = false;
14787           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
14788             IsElementZero = C->isNullValue();
14789           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
14790             IsElementZero = C->getConstantFPValue()->isNullValue();
14791           if (IsElementZero) {
14792             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
14793               FirstZeroAfterNonZero = i;
14794           }
14795           NonZero |= !IsElementZero;
14796
14797           // Find a legal type for the constant store.
14798           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14799           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14800           bool IsFast = false;
14801
14802           // Break early when size is too large to be legal.
14803           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14804             break;
14805
14806           if (TLI.isTypeLegal(StoreTy) &&
14807               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14808               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14809                                      FirstStoreAlign, &IsFast) &&
14810               IsFast) {
14811             LastIntegerTrunc = false;
14812             LastLegalType = i + 1;
14813             // Or check whether a truncstore is legal.
14814           } else if (TLI.getTypeAction(Context, StoreTy) ==
14815                      TargetLowering::TypePromoteInteger) {
14816             EVT LegalizedStoredValTy =
14817                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
14818             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14819                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14820                 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14821                                        FirstStoreAlign, &IsFast) &&
14822                 IsFast) {
14823               LastIntegerTrunc = true;
14824               LastLegalType = i + 1;
14825             }
14826           }
14827
14828           // We only use vectors if the constant is known to be zero or the
14829           // target allows it and the function is not marked with the
14830           // noimplicitfloat attribute.
14831           if ((!NonZero ||
14832                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
14833               !NoVectors) {
14834             // Find a legal type for the vector store.
14835             unsigned Elts = (i + 1) * NumMemElts;
14836             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14837             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
14838                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14839                 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14840                                        FirstStoreAlign, &IsFast) &&
14841                 IsFast)
14842               LastLegalVectorType = i + 1;
14843           }
14844         }
14845
14846         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
14847         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
14848
14849         // Check if we found a legal integer type that creates a meaningful
14850         // merge.
14851         if (NumElem < 2) {
14852           // We know that candidate stores are in order and of correct
14853           // shape. While there is no mergeable sequence from the
14854           // beginning one may start later in the sequence. The only
14855           // reason a merge of size N could have failed where another of
14856           // the same size would not have, is if the alignment has
14857           // improved or we've dropped a non-zero value. Drop as many
14858           // candidates as we can here.
14859           unsigned NumSkip = 1;
14860           while (
14861               (NumSkip < NumConsecutiveStores) &&
14862               (NumSkip < FirstZeroAfterNonZero) &&
14863               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14864             NumSkip++;
14865
14866           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14867           NumConsecutiveStores -= NumSkip;
14868           continue;
14869         }
14870
14871         // Check that we can merge these candidates without causing a cycle.
14872         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14873                                                       RootNode)) {
14874           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14875           NumConsecutiveStores -= NumElem;
14876           continue;
14877         }
14878
14879         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
14880                                               UseVector, LastIntegerTrunc);
14881
14882         // Remove merged stores for next iteration.
14883         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14884         NumConsecutiveStores -= NumElem;
14885       }
14886       continue;
14887     }
14888
14889     // When extracting multiple vector elements, try to store them
14890     // in one vector store rather than a sequence of scalar stores.
14891     if (IsExtractVecSrc) {
14892       // Loop on Consecutive Stores on success.
14893       while (NumConsecutiveStores >= 2) {
14894         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14895         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14896         unsigned FirstStoreAlign = FirstInChain->getAlignment();
14897         unsigned NumStoresToMerge = 1;
14898         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14899           // Find a legal type for the vector store.
14900           unsigned Elts = (i + 1) * NumMemElts;
14901           EVT Ty =
14902               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14903           bool IsFast;
14904
14905           // Break early when size is too large to be legal.
14906           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
14907             break;
14908
14909           if (TLI.isTypeLegal(Ty) &&
14910               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14911               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14912                                      FirstStoreAlign, &IsFast) &&
14913               IsFast)
14914             NumStoresToMerge = i + 1;
14915         }
14916
14917         // Check if we found a legal integer type creating a meaningful
14918         // merge.
14919         if (NumStoresToMerge < 2) {
14920           // We know that candidate stores are in order and of correct
14921           // shape. While there is no mergeable sequence from the
14922           // beginning one may start later in the sequence. The only
14923           // reason a merge of size N could have failed where another of
14924           // the same size would not have, is if the alignment has
14925           // improved. Drop as many candidates as we can here.
14926           unsigned NumSkip = 1;
14927           while (
14928               (NumSkip < NumConsecutiveStores) &&
14929               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14930             NumSkip++;
14931
14932           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14933           NumConsecutiveStores -= NumSkip;
14934           continue;
14935         }
14936
14937         // Check that we can merge these candidates without causing a cycle.
14938         if (!checkMergeStoreCandidatesForDependencies(
14939                 StoreNodes, NumStoresToMerge, RootNode)) {
14940           StoreNodes.erase(StoreNodes.begin(),
14941                            StoreNodes.begin() + NumStoresToMerge);
14942           NumConsecutiveStores -= NumStoresToMerge;
14943           continue;
14944         }
14945
14946         RV |= MergeStoresOfConstantsOrVecElts(
14947             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
14948
14949         StoreNodes.erase(StoreNodes.begin(),
14950                          StoreNodes.begin() + NumStoresToMerge);
14951         NumConsecutiveStores -= NumStoresToMerge;
14952       }
14953       continue;
14954     }
14955
14956     // Below we handle the case of multiple consecutive stores that
14957     // come from multiple consecutive loads. We merge them into a single
14958     // wide load and a single wide store.
14959
14960     // Look for load nodes which are used by the stored values.
14961     SmallVector<MemOpLink, 8> LoadNodes;
14962
14963     // Find acceptable loads. Loads need to have the same chain (token factor),
14964     // must not be zext, volatile, indexed, and they must be consecutive.
14965     BaseIndexOffset LdBasePtr;
14966
14967     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14968       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14969       SDValue Val = peekThroughBitcasts(St->getValue());
14970       LoadSDNode *Ld = cast<LoadSDNode>(Val);
14971
14972       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
14973       // If this is not the first ptr that we check.
14974       int64_t LdOffset = 0;
14975       if (LdBasePtr.getBase().getNode()) {
14976         // The base ptr must be the same.
14977         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
14978           break;
14979       } else {
14980         // Check that all other base pointers are the same as this one.
14981         LdBasePtr = LdPtr;
14982       }
14983
14984       // We found a potential memory operand to merge.
14985       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
14986     }
14987
14988     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
14989       // If we have load/store pair instructions and we only have two values,
14990       // don't bother merging.
14991       unsigned RequiredAlignment;
14992       if (LoadNodes.size() == 2 &&
14993           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
14994           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
14995         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
14996         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
14997         break;
14998       }
14999       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15000       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15001       unsigned FirstStoreAlign = FirstInChain->getAlignment();
15002       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
15003       unsigned FirstLoadAS = FirstLoad->getAddressSpace();
15004       unsigned FirstLoadAlign = FirstLoad->getAlignment();
15005
15006       // Scan the memory operations on the chain and find the first
15007       // non-consecutive load memory address. These variables hold the index in
15008       // the store node array.
15009
15010       unsigned LastConsecutiveLoad = 1;
15011
15012       // This variable refers to the size and not index in the array.
15013       unsigned LastLegalVectorType = 1;
15014       unsigned LastLegalIntegerType = 1;
15015       bool isDereferenceable = true;
15016       bool DoIntegerTruncate = false;
15017       StartAddress = LoadNodes[0].OffsetFromBase;
15018       SDValue FirstChain = FirstLoad->getChain();
15019       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
15020         // All loads must share the same chain.
15021         if (LoadNodes[i].MemNode->getChain() != FirstChain)
15022           break;
15023
15024         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
15025         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15026           break;
15027         LastConsecutiveLoad = i;
15028
15029         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
15030           isDereferenceable = false;
15031
15032         // Find a legal type for the vector store.
15033         unsigned Elts = (i + 1) * NumMemElts;
15034         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15035
15036         // Break early when size is too large to be legal.
15037         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15038           break;
15039
15040         bool IsFastSt, IsFastLd;
15041         if (TLI.isTypeLegal(StoreTy) &&
15042             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15043             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15044                                    FirstStoreAlign, &IsFastSt) &&
15045             IsFastSt &&
15046             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15047                                    FirstLoadAlign, &IsFastLd) &&
15048             IsFastLd) {
15049           LastLegalVectorType = i + 1;
15050         }
15051
15052         // Find a legal type for the integer store.
15053         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15054         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15055         if (TLI.isTypeLegal(StoreTy) &&
15056             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15057             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15058                                    FirstStoreAlign, &IsFastSt) &&
15059             IsFastSt &&
15060             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15061                                    FirstLoadAlign, &IsFastLd) &&
15062             IsFastLd) {
15063           LastLegalIntegerType = i + 1;
15064           DoIntegerTruncate = false;
15065           // Or check whether a truncstore and extload is legal.
15066         } else if (TLI.getTypeAction(Context, StoreTy) ==
15067                    TargetLowering::TypePromoteInteger) {
15068           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
15069           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15070               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15071               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
15072                                  StoreTy) &&
15073               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
15074                                  StoreTy) &&
15075               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
15076               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15077                                      FirstStoreAlign, &IsFastSt) &&
15078               IsFastSt &&
15079               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15080                                      FirstLoadAlign, &IsFastLd) &&
15081               IsFastLd) {
15082             LastLegalIntegerType = i + 1;
15083             DoIntegerTruncate = true;
15084           }
15085         }
15086       }
15087
15088       // Only use vector types if the vector type is larger than the integer
15089       // type. If they are the same, use integers.
15090       bool UseVectorTy =
15091           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
15092       unsigned LastLegalType =
15093           std::max(LastLegalVectorType, LastLegalIntegerType);
15094
15095       // We add +1 here because the LastXXX variables refer to location while
15096       // the NumElem refers to array/index size.
15097       unsigned NumElem =
15098           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
15099       NumElem = std::min(LastLegalType, NumElem);
15100
15101       if (NumElem < 2) {
15102         // We know that candidate stores are in order and of correct
15103         // shape. While there is no mergeable sequence from the
15104         // beginning one may start later in the sequence. The only
15105         // reason a merge of size N could have failed where another of
15106         // the same size would not have is if the alignment or either
15107         // the load or store has improved. Drop as many candidates as we
15108         // can here.
15109         unsigned NumSkip = 1;
15110         while ((NumSkip < LoadNodes.size()) &&
15111                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
15112                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15113           NumSkip++;
15114         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15115         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
15116         NumConsecutiveStores -= NumSkip;
15117         continue;
15118       }
15119
15120       // Check that we can merge these candidates without causing a cycle.
15121       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15122                                                     RootNode)) {
15123         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15124         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15125         NumConsecutiveStores -= NumElem;
15126         continue;
15127       }
15128
15129       // Find if it is better to use vectors or integers to load and store
15130       // to memory.
15131       EVT JointMemOpVT;
15132       if (UseVectorTy) {
15133         // Find a legal type for the vector store.
15134         unsigned Elts = NumElem * NumMemElts;
15135         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15136       } else {
15137         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
15138         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
15139       }
15140
15141       SDLoc LoadDL(LoadNodes[0].MemNode);
15142       SDLoc StoreDL(StoreNodes[0].MemNode);
15143
15144       // The merged loads are required to have the same incoming chain, so
15145       // using the first's chain is acceptable.
15146
15147       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
15148       AddToWorklist(NewStoreChain.getNode());
15149
15150       MachineMemOperand::Flags MMOFlags =
15151           isDereferenceable ? MachineMemOperand::MODereferenceable
15152                             : MachineMemOperand::MONone;
15153
15154       SDValue NewLoad, NewStore;
15155       if (UseVectorTy || !DoIntegerTruncate) {
15156         NewLoad =
15157             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
15158                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15159                         FirstLoadAlign, MMOFlags);
15160         NewStore = DAG.getStore(
15161             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
15162             FirstInChain->getPointerInfo(), FirstStoreAlign);
15163       } else { // This must be the truncstore/extload case
15164         EVT ExtendedTy =
15165             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
15166         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
15167                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
15168                                  FirstLoad->getPointerInfo(), JointMemOpVT,
15169                                  FirstLoadAlign, MMOFlags);
15170         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
15171                                      FirstInChain->getBasePtr(),
15172                                      FirstInChain->getPointerInfo(),
15173                                      JointMemOpVT, FirstInChain->getAlignment(),
15174                                      FirstInChain->getMemOperand()->getFlags());
15175       }
15176
15177       // Transfer chain users from old loads to the new load.
15178       for (unsigned i = 0; i < NumElem; ++i) {
15179         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
15180         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
15181                                       SDValue(NewLoad.getNode(), 1));
15182       }
15183
15184       // Replace the all stores with the new store. Recursively remove
15185       // corresponding value if its no longer used.
15186       for (unsigned i = 0; i < NumElem; ++i) {
15187         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
15188         CombineTo(StoreNodes[i].MemNode, NewStore);
15189         if (Val.getNode()->use_empty())
15190           recursivelyDeleteUnusedNodes(Val.getNode());
15191       }
15192
15193       RV = true;
15194       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15195       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15196       NumConsecutiveStores -= NumElem;
15197     }
15198   }
15199   return RV;
15200 }
15201
15202 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
15203   SDLoc SL(ST);
15204   SDValue ReplStore;
15205
15206   // Replace the chain to avoid dependency.
15207   if (ST->isTruncatingStore()) {
15208     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
15209                                   ST->getBasePtr(), ST->getMemoryVT(),
15210                                   ST->getMemOperand());
15211   } else {
15212     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
15213                              ST->getMemOperand());
15214   }
15215
15216   // Create token to keep both nodes around.
15217   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
15218                               MVT::Other, ST->getChain(), ReplStore);
15219
15220   // Make sure the new and old chains are cleaned up.
15221   AddToWorklist(Token.getNode());
15222
15223   // Don't add users to work list.
15224   return CombineTo(ST, Token, false);
15225 }
15226
15227 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
15228   SDValue Value = ST->getValue();
15229   if (Value.getOpcode() == ISD::TargetConstantFP)
15230     return SDValue();
15231
15232   SDLoc DL(ST);
15233
15234   SDValue Chain = ST->getChain();
15235   SDValue Ptr = ST->getBasePtr();
15236
15237   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
15238
15239   // NOTE: If the original store is volatile, this transform must not increase
15240   // the number of stores.  For example, on x86-32 an f64 can be stored in one
15241   // processor operation but an i64 (which is not legal) requires two.  So the
15242   // transform should not be done in this case.
15243
15244   SDValue Tmp;
15245   switch (CFP->getSimpleValueType(0).SimpleTy) {
15246   default:
15247     llvm_unreachable("Unknown FP type");
15248   case MVT::f16:    // We don't do this for these yet.
15249   case MVT::f80:
15250   case MVT::f128:
15251   case MVT::ppcf128:
15252     return SDValue();
15253   case MVT::f32:
15254     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
15255         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15256       ;
15257       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
15258                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
15259                             MVT::i32);
15260       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
15261     }
15262
15263     return SDValue();
15264   case MVT::f64:
15265     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
15266          !ST->isVolatile()) ||
15267         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
15268       ;
15269       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
15270                             getZExtValue(), SDLoc(CFP), MVT::i64);
15271       return DAG.getStore(Chain, DL, Tmp,
15272                           Ptr, ST->getMemOperand());
15273     }
15274
15275     if (!ST->isVolatile() &&
15276         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15277       // Many FP stores are not made apparent until after legalize, e.g. for
15278       // argument passing.  Since this is so common, custom legalize the
15279       // 64-bit integer store into two 32-bit stores.
15280       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
15281       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
15282       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
15283       if (DAG.getDataLayout().isBigEndian())
15284         std::swap(Lo, Hi);
15285
15286       unsigned Alignment = ST->getAlignment();
15287       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15288       AAMDNodes AAInfo = ST->getAAInfo();
15289
15290       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15291                                  ST->getAlignment(), MMOFlags, AAInfo);
15292       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15293                         DAG.getConstant(4, DL, Ptr.getValueType()));
15294       Alignment = MinAlign(Alignment, 4U);
15295       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
15296                                  ST->getPointerInfo().getWithOffset(4),
15297                                  Alignment, MMOFlags, AAInfo);
15298       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
15299                          St0, St1);
15300     }
15301
15302     return SDValue();
15303   }
15304 }
15305
15306 SDValue DAGCombiner::visitSTORE(SDNode *N) {
15307   StoreSDNode *ST  = cast<StoreSDNode>(N);
15308   SDValue Chain = ST->getChain();
15309   SDValue Value = ST->getValue();
15310   SDValue Ptr   = ST->getBasePtr();
15311
15312   // If this is a store of a bit convert, store the input value if the
15313   // resultant store does not need a higher alignment than the original.
15314   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
15315       ST->isUnindexed()) {
15316     EVT SVT = Value.getOperand(0).getValueType();
15317     // If the store is volatile, we only want to change the store type if the
15318     // resulting store is legal. Otherwise we might increase the number of
15319     // memory accesses. We don't care if the original type was legal or not
15320     // as we assume software couldn't rely on the number of accesses of an
15321     // illegal type.
15322     if (((!LegalOperations && !ST->isVolatile()) ||
15323          TLI.isOperationLegal(ISD::STORE, SVT)) &&
15324         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
15325       unsigned OrigAlign = ST->getAlignment();
15326       bool Fast = false;
15327       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
15328                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
15329           Fast) {
15330         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
15331                             ST->getPointerInfo(), OrigAlign,
15332                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
15333       }
15334     }
15335   }
15336
15337   // Turn 'store undef, Ptr' -> nothing.
15338   if (Value.isUndef() && ST->isUnindexed())
15339     return Chain;
15340
15341   // Try to infer better alignment information than the store already has.
15342   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
15343     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
15344       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
15345         SDValue NewStore =
15346             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
15347                               ST->getMemoryVT(), Align,
15348                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
15349         // NewStore will always be N as we are only refining the alignment
15350         assert(NewStore.getNode() == N);
15351         (void)NewStore;
15352       }
15353     }
15354   }
15355
15356   // Try transforming a pair floating point load / store ops to integer
15357   // load / store ops.
15358   if (SDValue NewST = TransformFPLoadStorePair(N))
15359     return NewST;
15360
15361   if (ST->isUnindexed()) {
15362     // Walk up chain skipping non-aliasing memory nodes, on this store and any
15363     // adjacent stores.
15364     if (findBetterNeighborChains(ST)) {
15365       // replaceStoreChain uses CombineTo, which handled all of the worklist
15366       // manipulation. Return the original node to not do anything else.
15367       return SDValue(ST, 0);
15368     }
15369     Chain = ST->getChain();
15370   }
15371
15372   // FIXME: is there such a thing as a truncating indexed store?
15373   if (ST->isTruncatingStore() && ST->isUnindexed() &&
15374       Value.getValueType().isInteger() &&
15375       (!isa<ConstantSDNode>(Value) ||
15376        !cast<ConstantSDNode>(Value)->isOpaque())) {
15377     // See if we can simplify the input to this truncstore with knowledge that
15378     // only the low bits are being used.  For example:
15379     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
15380     SDValue Shorter = DAG.GetDemandedBits(
15381         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
15382                                     ST->getMemoryVT().getScalarSizeInBits()));
15383     AddToWorklist(Value.getNode());
15384     if (Shorter.getNode())
15385       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
15386                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
15387
15388     // Otherwise, see if we can simplify the operation with
15389     // SimplifyDemandedBits, which only works if the value has a single use.
15390     if (SimplifyDemandedBits(
15391             Value,
15392             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
15393                                  ST->getMemoryVT().getScalarSizeInBits()))) {
15394       // Re-visit the store if anything changed and the store hasn't been merged
15395       // with another node (N is deleted) SimplifyDemandedBits will add Value's
15396       // node back to the worklist if necessary, but we also need to re-visit
15397       // the Store node itself.
15398       if (N->getOpcode() != ISD::DELETED_NODE)
15399         AddToWorklist(N);
15400       return SDValue(N, 0);
15401     }
15402   }
15403
15404   // If this is a load followed by a store to the same location, then the store
15405   // is dead/noop.
15406   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
15407     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
15408         ST->isUnindexed() && !ST->isVolatile() &&
15409         // There can't be any side effects between the load and store, such as
15410         // a call or store.
15411         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
15412       // The store is dead, remove it.
15413       return Chain;
15414     }
15415   }
15416
15417   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
15418     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
15419         !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
15420         ST->getMemoryVT() == ST1->getMemoryVT()) {
15421       // If this is a store followed by a store with the same value to the same
15422       // location, then the store is dead/noop.
15423       if (ST1->getValue() == Value) {
15424         // The store is dead, remove it.
15425         return Chain;
15426       }
15427
15428       // If this is a store who's preceeding store to the same location
15429       // and no one other node is chained to that store we can effectively
15430       // drop the store. Do not remove stores to undef as they may be used as
15431       // data sinks.
15432       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
15433           !ST1->getBasePtr().isUndef()) {
15434         // ST1 is fully overwritten and can be elided. Combine with it's chain
15435         // value.
15436         CombineTo(ST1, ST1->getChain());
15437         return SDValue();
15438       }
15439     }
15440   }
15441
15442   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
15443   // truncating store.  We can do this even if this is already a truncstore.
15444   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
15445       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
15446       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
15447                             ST->getMemoryVT())) {
15448     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
15449                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
15450   }
15451
15452   // Always perform this optimization before types are legal. If the target
15453   // prefers, also try this after legalization to catch stores that were created
15454   // by intrinsics or other nodes.
15455   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
15456     while (true) {
15457       // There can be multiple store sequences on the same chain.
15458       // Keep trying to merge store sequences until we are unable to do so
15459       // or until we merge the last store on the chain.
15460       bool Changed = MergeConsecutiveStores(ST);
15461       if (!Changed) break;
15462       // Return N as merge only uses CombineTo and no worklist clean
15463       // up is necessary.
15464       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
15465         return SDValue(N, 0);
15466     }
15467   }
15468
15469   // Try transforming N to an indexed store.
15470   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15471     return SDValue(N, 0);
15472
15473   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
15474   //
15475   // Make sure to do this only after attempting to merge stores in order to
15476   //  avoid changing the types of some subset of stores due to visit order,
15477   //  preventing their merging.
15478   if (isa<ConstantFPSDNode>(ST->getValue())) {
15479     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
15480       return NewSt;
15481   }
15482
15483   if (SDValue NewSt = splitMergedValStore(ST))
15484     return NewSt;
15485
15486   return ReduceLoadOpStoreWidth(N);
15487 }
15488
15489 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
15490   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
15491   if (!LifetimeEnd->hasOffset())
15492     return SDValue();
15493
15494   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
15495                                         LifetimeEnd->getOffset(), false);
15496
15497   // We walk up the chains to find stores.
15498   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
15499   while (!Chains.empty()) {
15500     SDValue Chain = Chains.back();
15501     Chains.pop_back();
15502     switch (Chain.getOpcode()) {
15503     case ISD::TokenFactor:
15504       for (unsigned Nops = Chain.getNumOperands(); Nops;)
15505         Chains.push_back(Chain.getOperand(--Nops));
15506       break;
15507     case ISD::LIFETIME_START:
15508     case ISD::LIFETIME_END: {
15509       // We can forward past any lifetime start/end that can be proven not to
15510       // alias the node.
15511       const auto *LifetimeStart = cast<LifetimeSDNode>(Chain);
15512       if (!LifetimeStart->hasOffset())
15513         break; // Be conservative if we don't know the extents of the object.
15514
15515       const BaseIndexOffset LifetimeStartBase(
15516           LifetimeStart->getOperand(1), SDValue(), LifetimeStart->getOffset(),
15517           false);
15518       bool IsAlias;
15519       if (BaseIndexOffset::computeAliasing(
15520               LifetimeEndBase, LifetimeEnd->getSize(), LifetimeStartBase,
15521               LifetimeStart->getSize(), DAG, IsAlias) &&
15522           !IsAlias) {
15523         Chains.push_back(Chain.getOperand(0));
15524       }
15525       break;
15526     }
15527
15528     case ISD::STORE: {
15529       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
15530       if (ST->isVolatile() || !ST->hasOneUse() || ST->isIndexed())
15531         continue;
15532       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
15533       // If we store purely within object bounds just before its lifetime ends,
15534       // we can remove the store.
15535       if (LifetimeEndBase.contains(LifetimeEnd->getSize(), StoreBase,
15536                                    ST->getMemoryVT().getStoreSize(), DAG)) {
15537         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
15538                    dbgs() << "\nwithin LIFETIME_END of : ";
15539                    LifetimeEndBase.dump(); dbgs() << "\n");
15540         CombineTo(ST, ST->getChain());
15541         return SDValue(N, 0);
15542       }
15543     }
15544     }
15545   }
15546   return SDValue();
15547 }
15548
15549 /// For the instruction sequence of store below, F and I values
15550 /// are bundled together as an i64 value before being stored into memory.
15551 /// Sometimes it is more efficent to generate separate stores for F and I,
15552 /// which can remove the bitwise instructions or sink them to colder places.
15553 ///
15554 ///   (store (or (zext (bitcast F to i32) to i64),
15555 ///              (shl (zext I to i64), 32)), addr)  -->
15556 ///   (store F, addr) and (store I, addr+4)
15557 ///
15558 /// Similarly, splitting for other merged store can also be beneficial, like:
15559 /// For pair of {i32, i32}, i64 store --> two i32 stores.
15560 /// For pair of {i32, i16}, i64 store --> two i32 stores.
15561 /// For pair of {i16, i16}, i32 store --> two i16 stores.
15562 /// For pair of {i16, i8},  i32 store --> two i16 stores.
15563 /// For pair of {i8, i8},   i16 store --> two i8 stores.
15564 ///
15565 /// We allow each target to determine specifically which kind of splitting is
15566 /// supported.
15567 ///
15568 /// The store patterns are commonly seen from the simple code snippet below
15569 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
15570 ///   void goo(const std::pair<int, float> &);
15571 ///   hoo() {
15572 ///     ...
15573 ///     goo(std::make_pair(tmp, ftmp));
15574 ///     ...
15575 ///   }
15576 ///
15577 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
15578   if (OptLevel == CodeGenOpt::None)
15579     return SDValue();
15580
15581   SDValue Val = ST->getValue();
15582   SDLoc DL(ST);
15583
15584   // Match OR operand.
15585   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
15586     return SDValue();
15587
15588   // Match SHL operand and get Lower and Higher parts of Val.
15589   SDValue Op1 = Val.getOperand(0);
15590   SDValue Op2 = Val.getOperand(1);
15591   SDValue Lo, Hi;
15592   if (Op1.getOpcode() != ISD::SHL) {
15593     std::swap(Op1, Op2);
15594     if (Op1.getOpcode() != ISD::SHL)
15595       return SDValue();
15596   }
15597   Lo = Op2;
15598   Hi = Op1.getOperand(0);
15599   if (!Op1.hasOneUse())
15600     return SDValue();
15601
15602   // Match shift amount to HalfValBitSize.
15603   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
15604   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
15605   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
15606     return SDValue();
15607
15608   // Lo and Hi are zero-extended from int with size less equal than 32
15609   // to i64.
15610   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
15611       !Lo.getOperand(0).getValueType().isScalarInteger() ||
15612       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
15613       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
15614       !Hi.getOperand(0).getValueType().isScalarInteger() ||
15615       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
15616     return SDValue();
15617
15618   // Use the EVT of low and high parts before bitcast as the input
15619   // of target query.
15620   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
15621                   ? Lo.getOperand(0).getValueType()
15622                   : Lo.getValueType();
15623   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
15624                    ? Hi.getOperand(0).getValueType()
15625                    : Hi.getValueType();
15626   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
15627     return SDValue();
15628
15629   // Start to split store.
15630   unsigned Alignment = ST->getAlignment();
15631   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15632   AAMDNodes AAInfo = ST->getAAInfo();
15633
15634   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
15635   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
15636   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
15637   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
15638
15639   SDValue Chain = ST->getChain();
15640   SDValue Ptr = ST->getBasePtr();
15641   // Lower value store.
15642   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15643                              ST->getAlignment(), MMOFlags, AAInfo);
15644   Ptr =
15645       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15646                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
15647   // Higher value store.
15648   SDValue St1 =
15649       DAG.getStore(St0, DL, Hi, Ptr,
15650                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
15651                    Alignment / 2, MMOFlags, AAInfo);
15652   return St1;
15653 }
15654
15655 /// Convert a disguised subvector insertion into a shuffle:
15656 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
15657 /// bitcast(shuffle (bitcast V), (extended X), Mask)
15658 /// Note: We do not use an insert_subvector node because that requires a legal
15659 /// subvector type.
15660 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
15661   SDValue InsertVal = N->getOperand(1);
15662   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
15663       !InsertVal.getOperand(0).getValueType().isVector())
15664     return SDValue();
15665
15666   SDValue SubVec = InsertVal.getOperand(0);
15667   SDValue DestVec = N->getOperand(0);
15668   EVT SubVecVT = SubVec.getValueType();
15669   EVT VT = DestVec.getValueType();
15670   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
15671   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
15672   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
15673
15674   // Step 1: Create a shuffle mask that implements this insert operation. The
15675   // vector that we are inserting into will be operand 0 of the shuffle, so
15676   // those elements are just 'i'. The inserted subvector is in the first
15677   // positions of operand 1 of the shuffle. Example:
15678   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
15679   SmallVector<int, 16> Mask(NumMaskVals);
15680   for (unsigned i = 0; i != NumMaskVals; ++i) {
15681     if (i / NumSrcElts == InsIndex)
15682       Mask[i] = (i % NumSrcElts) + NumMaskVals;
15683     else
15684       Mask[i] = i;
15685   }
15686
15687   // Bail out if the target can not handle the shuffle we want to create.
15688   EVT SubVecEltVT = SubVecVT.getVectorElementType();
15689   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
15690   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
15691     return SDValue();
15692
15693   // Step 2: Create a wide vector from the inserted source vector by appending
15694   // undefined elements. This is the same size as our destination vector.
15695   SDLoc DL(N);
15696   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
15697   ConcatOps[0] = SubVec;
15698   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
15699
15700   // Step 3: Shuffle in the padded subvector.
15701   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
15702   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
15703   AddToWorklist(PaddedSubV.getNode());
15704   AddToWorklist(DestVecBC.getNode());
15705   AddToWorklist(Shuf.getNode());
15706   return DAG.getBitcast(VT, Shuf);
15707 }
15708
15709 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
15710   SDValue InVec = N->getOperand(0);
15711   SDValue InVal = N->getOperand(1);
15712   SDValue EltNo = N->getOperand(2);
15713   SDLoc DL(N);
15714
15715   // If the inserted element is an UNDEF, just use the input vector.
15716   if (InVal.isUndef())
15717     return InVec;
15718
15719   EVT VT = InVec.getValueType();
15720   unsigned NumElts = VT.getVectorNumElements();
15721
15722   // Remove redundant insertions:
15723   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
15724   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15725       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
15726     return InVec;
15727
15728   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15729   if (!IndexC) {
15730     // If this is variable insert to undef vector, it might be better to splat:
15731     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
15732     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
15733       SmallVector<SDValue, 8> Ops(NumElts, InVal);
15734       return DAG.getBuildVector(VT, DL, Ops);
15735     }
15736     return SDValue();
15737   }
15738
15739   // We must know which element is being inserted for folds below here.
15740   unsigned Elt = IndexC->getZExtValue();
15741   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
15742     return Shuf;
15743
15744   // Canonicalize insert_vector_elt dag nodes.
15745   // Example:
15746   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
15747   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
15748   //
15749   // Do this only if the child insert_vector node has one use; also
15750   // do this only if indices are both constants and Idx1 < Idx0.
15751   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
15752       && isa<ConstantSDNode>(InVec.getOperand(2))) {
15753     unsigned OtherElt = InVec.getConstantOperandVal(2);
15754     if (Elt < OtherElt) {
15755       // Swap nodes.
15756       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
15757                                   InVec.getOperand(0), InVal, EltNo);
15758       AddToWorklist(NewOp.getNode());
15759       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
15760                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
15761     }
15762   }
15763
15764   // If we can't generate a legal BUILD_VECTOR, exit
15765   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
15766     return SDValue();
15767
15768   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
15769   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
15770   // vector elements.
15771   SmallVector<SDValue, 8> Ops;
15772   // Do not combine these two vectors if the output vector will not replace
15773   // the input vector.
15774   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
15775     Ops.append(InVec.getNode()->op_begin(),
15776                InVec.getNode()->op_end());
15777   } else if (InVec.isUndef()) {
15778     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
15779   } else {
15780     return SDValue();
15781   }
15782   assert(Ops.size() == NumElts && "Unexpected vector size");
15783
15784   // Insert the element
15785   if (Elt < Ops.size()) {
15786     // All the operands of BUILD_VECTOR must have the same type;
15787     // we enforce that here.
15788     EVT OpVT = Ops[0].getValueType();
15789     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
15790   }
15791
15792   // Return the new vector
15793   return DAG.getBuildVector(VT, DL, Ops);
15794 }
15795
15796 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
15797                                                   SDValue EltNo,
15798                                                   LoadSDNode *OriginalLoad) {
15799   assert(!OriginalLoad->isVolatile());
15800
15801   EVT ResultVT = EVE->getValueType(0);
15802   EVT VecEltVT = InVecVT.getVectorElementType();
15803   unsigned Align = OriginalLoad->getAlignment();
15804   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
15805       VecEltVT.getTypeForEVT(*DAG.getContext()));
15806
15807   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
15808     return SDValue();
15809
15810   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
15811     ISD::NON_EXTLOAD : ISD::EXTLOAD;
15812   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
15813     return SDValue();
15814
15815   Align = NewAlign;
15816
15817   SDValue NewPtr = OriginalLoad->getBasePtr();
15818   SDValue Offset;
15819   EVT PtrType = NewPtr.getValueType();
15820   MachinePointerInfo MPI;
15821   SDLoc DL(EVE);
15822   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
15823     int Elt = ConstEltNo->getZExtValue();
15824     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
15825     Offset = DAG.getConstant(PtrOff, DL, PtrType);
15826     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
15827   } else {
15828     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
15829     Offset = DAG.getNode(
15830         ISD::MUL, DL, PtrType, Offset,
15831         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
15832     // Discard the pointer info except the address space because the memory
15833     // operand can't represent this new access since the offset is variable.
15834     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
15835   }
15836   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
15837
15838   // The replacement we need to do here is a little tricky: we need to
15839   // replace an extractelement of a load with a load.
15840   // Use ReplaceAllUsesOfValuesWith to do the replacement.
15841   // Note that this replacement assumes that the extractvalue is the only
15842   // use of the load; that's okay because we don't want to perform this
15843   // transformation in other cases anyway.
15844   SDValue Load;
15845   SDValue Chain;
15846   if (ResultVT.bitsGT(VecEltVT)) {
15847     // If the result type of vextract is wider than the load, then issue an
15848     // extending load instead.
15849     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
15850                                                   VecEltVT)
15851                                    ? ISD::ZEXTLOAD
15852                                    : ISD::EXTLOAD;
15853     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
15854                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
15855                           Align, OriginalLoad->getMemOperand()->getFlags(),
15856                           OriginalLoad->getAAInfo());
15857     Chain = Load.getValue(1);
15858   } else {
15859     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
15860                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
15861                        OriginalLoad->getAAInfo());
15862     Chain = Load.getValue(1);
15863     if (ResultVT.bitsLT(VecEltVT))
15864       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
15865     else
15866       Load = DAG.getBitcast(ResultVT, Load);
15867   }
15868   WorklistRemover DeadNodes(*this);
15869   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
15870   SDValue To[] = { Load, Chain };
15871   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
15872   // Since we're explicitly calling ReplaceAllUses, add the new node to the
15873   // worklist explicitly as well.
15874   AddToWorklist(Load.getNode());
15875   AddUsersToWorklist(Load.getNode()); // Add users too
15876   // Make sure to revisit this node to clean it up; it will usually be dead.
15877   AddToWorklist(EVE);
15878   ++OpsNarrowed;
15879   return SDValue(EVE, 0);
15880 }
15881
15882 /// Transform a vector binary operation into a scalar binary operation by moving
15883 /// the math/logic after an extract element of a vector.
15884 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
15885                                        bool LegalOperations) {
15886   SDValue Vec = ExtElt->getOperand(0);
15887   SDValue Index = ExtElt->getOperand(1);
15888   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
15889   if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
15890     return SDValue();
15891
15892   // Targets may want to avoid this to prevent an expensive register transfer.
15893   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15894   if (!TLI.shouldScalarizeBinop(Vec))
15895     return SDValue();
15896
15897   // Extracting an element of a vector constant is constant-folded, so this
15898   // transform is just replacing a vector op with a scalar op while moving the
15899   // extract.
15900   SDValue Op0 = Vec.getOperand(0);
15901   SDValue Op1 = Vec.getOperand(1);
15902   if (isAnyConstantBuildVector(Op0, true) ||
15903       isAnyConstantBuildVector(Op1, true)) {
15904     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
15905     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
15906     SDLoc DL(ExtElt);
15907     EVT VT = ExtElt->getValueType(0);
15908     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
15909     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
15910     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
15911   }
15912
15913   return SDValue();
15914 }
15915
15916 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
15917   SDValue VecOp = N->getOperand(0);
15918   SDValue Index = N->getOperand(1);
15919   EVT ScalarVT = N->getValueType(0);
15920   EVT VecVT = VecOp.getValueType();
15921   if (VecOp.isUndef())
15922     return DAG.getUNDEF(ScalarVT);
15923
15924   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
15925   //
15926   // This only really matters if the index is non-constant since other combines
15927   // on the constant elements already work.
15928   SDLoc DL(N);
15929   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
15930       Index == VecOp.getOperand(2)) {
15931     SDValue Elt = VecOp.getOperand(1);
15932     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
15933   }
15934
15935   // (vextract (scalar_to_vector val, 0) -> val
15936   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15937     // Check if the result type doesn't match the inserted element type. A
15938     // SCALAR_TO_VECTOR may truncate the inserted element and the
15939     // EXTRACT_VECTOR_ELT may widen the extracted vector.
15940     SDValue InOp = VecOp.getOperand(0);
15941     if (InOp.getValueType() != ScalarVT) {
15942       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
15943       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
15944     }
15945     return InOp;
15946   }
15947
15948   // extract_vector_elt of out-of-bounds element -> UNDEF
15949   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
15950   unsigned NumElts = VecVT.getVectorNumElements();
15951   if (IndexC && IndexC->getAPIntValue().uge(NumElts))
15952     return DAG.getUNDEF(ScalarVT);
15953
15954   // extract_vector_elt (build_vector x, y), 1 -> y
15955   if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
15956       TLI.isTypeLegal(VecVT) &&
15957       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
15958     SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
15959     EVT InEltVT = Elt.getValueType();
15960
15961     // Sometimes build_vector's scalar input types do not match result type.
15962     if (ScalarVT == InEltVT)
15963       return Elt;
15964
15965     // TODO: It may be useful to truncate if free if the build_vector implicitly
15966     // converts.
15967   }
15968
15969   // TODO: These transforms should not require the 'hasOneUse' restriction, but
15970   // there are regressions on multiple targets without it. We can end up with a
15971   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
15972   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
15973       VecOp.hasOneUse()) {
15974     // The vector index of the LSBs of the source depend on the endian-ness.
15975     bool IsLE = DAG.getDataLayout().isLittleEndian();
15976     unsigned ExtractIndex = IndexC->getZExtValue();
15977     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
15978     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
15979     SDValue BCSrc = VecOp.getOperand(0);
15980     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
15981       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
15982
15983     if (LegalTypes && BCSrc.getValueType().isInteger() &&
15984         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15985       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
15986       // trunc i64 X to i32
15987       SDValue X = BCSrc.getOperand(0);
15988       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
15989              "Extract element and scalar to vector can't change element type "
15990              "from FP to integer.");
15991       unsigned XBitWidth = X.getValueSizeInBits();
15992       unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
15993       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
15994
15995       // An extract element return value type can be wider than its vector
15996       // operand element type. In that case, the high bits are undefined, so
15997       // it's possible that we may need to extend rather than truncate.
15998       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
15999         assert(XBitWidth % VecEltBitWidth == 0 &&
16000                "Scalar bitwidth must be a multiple of vector element bitwidth");
16001         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
16002       }
16003     }
16004   }
16005
16006   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
16007     return BO;
16008
16009   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
16010   // We only perform this optimization before the op legalization phase because
16011   // we may introduce new vector instructions which are not backed by TD
16012   // patterns. For example on AVX, extracting elements from a wide vector
16013   // without using extract_subvector. However, if we can find an underlying
16014   // scalar value, then we can always use that.
16015   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
16016     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
16017     // Find the new index to extract from.
16018     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
16019
16020     // Extracting an undef index is undef.
16021     if (OrigElt == -1)
16022       return DAG.getUNDEF(ScalarVT);
16023
16024     // Select the right vector half to extract from.
16025     SDValue SVInVec;
16026     if (OrigElt < (int)NumElts) {
16027       SVInVec = VecOp.getOperand(0);
16028     } else {
16029       SVInVec = VecOp.getOperand(1);
16030       OrigElt -= NumElts;
16031     }
16032
16033     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
16034       SDValue InOp = SVInVec.getOperand(OrigElt);
16035       if (InOp.getValueType() != ScalarVT) {
16036         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16037         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16038       }
16039
16040       return InOp;
16041     }
16042
16043     // FIXME: We should handle recursing on other vector shuffles and
16044     // scalar_to_vector here as well.
16045
16046     if (!LegalOperations ||
16047         // FIXME: Should really be just isOperationLegalOrCustom.
16048         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
16049         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
16050       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16051       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
16052                          DAG.getConstant(OrigElt, DL, IndexTy));
16053     }
16054   }
16055
16056   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
16057   // simplify it based on the (valid) extraction indices.
16058   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
16059         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16060                Use->getOperand(0) == VecOp &&
16061                isa<ConstantSDNode>(Use->getOperand(1));
16062       })) {
16063     APInt DemandedElts = APInt::getNullValue(NumElts);
16064     for (SDNode *Use : VecOp->uses()) {
16065       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
16066       if (CstElt->getAPIntValue().ult(NumElts))
16067         DemandedElts.setBit(CstElt->getZExtValue());
16068     }
16069     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
16070       // We simplified the vector operand of this extract element. If this
16071       // extract is not dead, visit it again so it is folded properly.
16072       if (N->getOpcode() != ISD::DELETED_NODE)
16073         AddToWorklist(N);
16074       return SDValue(N, 0);
16075     }
16076   }
16077
16078   // Everything under here is trying to match an extract of a loaded value.
16079   // If the result of load has to be truncated, then it's not necessarily
16080   // profitable.
16081   bool BCNumEltsChanged = false;
16082   EVT ExtVT = VecVT.getVectorElementType();
16083   EVT LVT = ExtVT;
16084   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
16085     return SDValue();
16086
16087   if (VecOp.getOpcode() == ISD::BITCAST) {
16088     // Don't duplicate a load with other uses.
16089     if (!VecOp.hasOneUse())
16090       return SDValue();
16091
16092     EVT BCVT = VecOp.getOperand(0).getValueType();
16093     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
16094       return SDValue();
16095     if (NumElts != BCVT.getVectorNumElements())
16096       BCNumEltsChanged = true;
16097     VecOp = VecOp.getOperand(0);
16098     ExtVT = BCVT.getVectorElementType();
16099   }
16100
16101   // extract (vector load $addr), i --> load $addr + i * size
16102   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
16103       ISD::isNormalLoad(VecOp.getNode()) &&
16104       !Index->hasPredecessor(VecOp.getNode())) {
16105     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
16106     if (VecLoad && !VecLoad->isVolatile())
16107       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
16108   }
16109
16110   // Perform only after legalization to ensure build_vector / vector_shuffle
16111   // optimizations have already been done.
16112   if (!LegalOperations || !IndexC)
16113     return SDValue();
16114
16115   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
16116   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
16117   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
16118   int Elt = IndexC->getZExtValue();
16119   LoadSDNode *LN0 = nullptr;
16120   if (ISD::isNormalLoad(VecOp.getNode())) {
16121     LN0 = cast<LoadSDNode>(VecOp);
16122   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16123              VecOp.getOperand(0).getValueType() == ExtVT &&
16124              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
16125     // Don't duplicate a load with other uses.
16126     if (!VecOp.hasOneUse())
16127       return SDValue();
16128
16129     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
16130   }
16131   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
16132     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
16133     // =>
16134     // (load $addr+1*size)
16135
16136     // Don't duplicate a load with other uses.
16137     if (!VecOp.hasOneUse())
16138       return SDValue();
16139
16140     // If the bit convert changed the number of elements, it is unsafe
16141     // to examine the mask.
16142     if (BCNumEltsChanged)
16143       return SDValue();
16144
16145     // Select the input vector, guarding against out of range extract vector.
16146     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
16147     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
16148
16149     if (VecOp.getOpcode() == ISD::BITCAST) {
16150       // Don't duplicate a load with other uses.
16151       if (!VecOp.hasOneUse())
16152         return SDValue();
16153
16154       VecOp = VecOp.getOperand(0);
16155     }
16156     if (ISD::isNormalLoad(VecOp.getNode())) {
16157       LN0 = cast<LoadSDNode>(VecOp);
16158       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
16159       Index = DAG.getConstant(Elt, DL, Index.getValueType());
16160     }
16161   }
16162
16163   // Make sure we found a non-volatile load and the extractelement is
16164   // the only use.
16165   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
16166     return SDValue();
16167
16168   // If Idx was -1 above, Elt is going to be -1, so just return undef.
16169   if (Elt == -1)
16170     return DAG.getUNDEF(LVT);
16171
16172   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
16173 }
16174
16175 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
16176 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
16177   // We perform this optimization post type-legalization because
16178   // the type-legalizer often scalarizes integer-promoted vectors.
16179   // Performing this optimization before may create bit-casts which
16180   // will be type-legalized to complex code sequences.
16181   // We perform this optimization only before the operation legalizer because we
16182   // may introduce illegal operations.
16183   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
16184     return SDValue();
16185
16186   unsigned NumInScalars = N->getNumOperands();
16187   SDLoc DL(N);
16188   EVT VT = N->getValueType(0);
16189
16190   // Check to see if this is a BUILD_VECTOR of a bunch of values
16191   // which come from any_extend or zero_extend nodes. If so, we can create
16192   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
16193   // optimizations. We do not handle sign-extend because we can't fill the sign
16194   // using shuffles.
16195   EVT SourceType = MVT::Other;
16196   bool AllAnyExt = true;
16197
16198   for (unsigned i = 0; i != NumInScalars; ++i) {
16199     SDValue In = N->getOperand(i);
16200     // Ignore undef inputs.
16201     if (In.isUndef()) continue;
16202
16203     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
16204     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
16205
16206     // Abort if the element is not an extension.
16207     if (!ZeroExt && !AnyExt) {
16208       SourceType = MVT::Other;
16209       break;
16210     }
16211
16212     // The input is a ZeroExt or AnyExt. Check the original type.
16213     EVT InTy = In.getOperand(0).getValueType();
16214
16215     // Check that all of the widened source types are the same.
16216     if (SourceType == MVT::Other)
16217       // First time.
16218       SourceType = InTy;
16219     else if (InTy != SourceType) {
16220       // Multiple income types. Abort.
16221       SourceType = MVT::Other;
16222       break;
16223     }
16224
16225     // Check if all of the extends are ANY_EXTENDs.
16226     AllAnyExt &= AnyExt;
16227   }
16228
16229   // In order to have valid types, all of the inputs must be extended from the
16230   // same source type and all of the inputs must be any or zero extend.
16231   // Scalar sizes must be a power of two.
16232   EVT OutScalarTy = VT.getScalarType();
16233   bool ValidTypes = SourceType != MVT::Other &&
16234                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
16235                  isPowerOf2_32(SourceType.getSizeInBits());
16236
16237   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
16238   // turn into a single shuffle instruction.
16239   if (!ValidTypes)
16240     return SDValue();
16241
16242   bool isLE = DAG.getDataLayout().isLittleEndian();
16243   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
16244   assert(ElemRatio > 1 && "Invalid element size ratio");
16245   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
16246                                DAG.getConstant(0, DL, SourceType);
16247
16248   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
16249   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
16250
16251   // Populate the new build_vector
16252   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16253     SDValue Cast = N->getOperand(i);
16254     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
16255             Cast.getOpcode() == ISD::ZERO_EXTEND ||
16256             Cast.isUndef()) && "Invalid cast opcode");
16257     SDValue In;
16258     if (Cast.isUndef())
16259       In = DAG.getUNDEF(SourceType);
16260     else
16261       In = Cast->getOperand(0);
16262     unsigned Index = isLE ? (i * ElemRatio) :
16263                             (i * ElemRatio + (ElemRatio - 1));
16264
16265     assert(Index < Ops.size() && "Invalid index");
16266     Ops[Index] = In;
16267   }
16268
16269   // The type of the new BUILD_VECTOR node.
16270   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
16271   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
16272          "Invalid vector size");
16273   // Check if the new vector type is legal.
16274   if (!isTypeLegal(VecVT) ||
16275       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
16276        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
16277     return SDValue();
16278
16279   // Make the new BUILD_VECTOR.
16280   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
16281
16282   // The new BUILD_VECTOR node has the potential to be further optimized.
16283   AddToWorklist(BV.getNode());
16284   // Bitcast to the desired type.
16285   return DAG.getBitcast(VT, BV);
16286 }
16287
16288 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
16289                                            ArrayRef<int> VectorMask,
16290                                            SDValue VecIn1, SDValue VecIn2,
16291                                            unsigned LeftIdx) {
16292   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16293   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
16294
16295   EVT VT = N->getValueType(0);
16296   EVT InVT1 = VecIn1.getValueType();
16297   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
16298
16299   unsigned Vec2Offset = 0;
16300   unsigned NumElems = VT.getVectorNumElements();
16301   unsigned ShuffleNumElems = NumElems;
16302
16303   // In case both the input vectors are extracted from same base
16304   // vector we do not need extra addend (Vec2Offset) while
16305   // computing shuffle mask.
16306   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
16307       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
16308       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
16309     Vec2Offset = InVT1.getVectorNumElements();
16310
16311   // We can't generate a shuffle node with mismatched input and output types.
16312   // Try to make the types match the type of the output.
16313   if (InVT1 != VT || InVT2 != VT) {
16314     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
16315       // If the output vector length is a multiple of both input lengths,
16316       // we can concatenate them and pad the rest with undefs.
16317       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
16318       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
16319       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
16320       ConcatOps[0] = VecIn1;
16321       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
16322       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16323       VecIn2 = SDValue();
16324     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
16325       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
16326         return SDValue();
16327
16328       if (!VecIn2.getNode()) {
16329         // If we only have one input vector, and it's twice the size of the
16330         // output, split it in two.
16331         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
16332                              DAG.getConstant(NumElems, DL, IdxTy));
16333         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
16334         // Since we now have shorter input vectors, adjust the offset of the
16335         // second vector's start.
16336         Vec2Offset = NumElems;
16337       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
16338         // VecIn1 is wider than the output, and we have another, possibly
16339         // smaller input. Pad the smaller input with undefs, shuffle at the
16340         // input vector width, and extract the output.
16341         // The shuffle type is different than VT, so check legality again.
16342         if (LegalOperations &&
16343             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
16344           return SDValue();
16345
16346         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
16347         // lower it back into a BUILD_VECTOR. So if the inserted type is
16348         // illegal, don't even try.
16349         if (InVT1 != InVT2) {
16350           if (!TLI.isTypeLegal(InVT2))
16351             return SDValue();
16352           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
16353                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
16354         }
16355         ShuffleNumElems = NumElems * 2;
16356       } else {
16357         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
16358         // than VecIn1. We can't handle this for now - this case will disappear
16359         // when we start sorting the vectors by type.
16360         return SDValue();
16361       }
16362     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
16363                InVT1.getSizeInBits() == VT.getSizeInBits()) {
16364       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
16365       ConcatOps[0] = VecIn2;
16366       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16367     } else {
16368       // TODO: Support cases where the length mismatch isn't exactly by a
16369       // factor of 2.
16370       // TODO: Move this check upwards, so that if we have bad type
16371       // mismatches, we don't create any DAG nodes.
16372       return SDValue();
16373     }
16374   }
16375
16376   // Initialize mask to undef.
16377   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
16378
16379   // Only need to run up to the number of elements actually used, not the
16380   // total number of elements in the shuffle - if we are shuffling a wider
16381   // vector, the high lanes should be set to undef.
16382   for (unsigned i = 0; i != NumElems; ++i) {
16383     if (VectorMask[i] <= 0)
16384       continue;
16385
16386     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
16387     if (VectorMask[i] == (int)LeftIdx) {
16388       Mask[i] = ExtIndex;
16389     } else if (VectorMask[i] == (int)LeftIdx + 1) {
16390       Mask[i] = Vec2Offset + ExtIndex;
16391     }
16392   }
16393
16394   // The type the input vectors may have changed above.
16395   InVT1 = VecIn1.getValueType();
16396
16397   // If we already have a VecIn2, it should have the same type as VecIn1.
16398   // If we don't, get an undef/zero vector of the appropriate type.
16399   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
16400   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
16401
16402   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
16403   if (ShuffleNumElems > NumElems)
16404     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
16405
16406   return Shuffle;
16407 }
16408
16409 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
16410   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
16411
16412   // First, determine where the build vector is not undef.
16413   // TODO: We could extend this to handle zero elements as well as undefs.
16414   int NumBVOps = BV->getNumOperands();
16415   int ZextElt = -1;
16416   for (int i = 0; i != NumBVOps; ++i) {
16417     SDValue Op = BV->getOperand(i);
16418     if (Op.isUndef())
16419       continue;
16420     if (ZextElt == -1)
16421       ZextElt = i;
16422     else
16423       return SDValue();
16424   }
16425   // Bail out if there's no non-undef element.
16426   if (ZextElt == -1)
16427     return SDValue();
16428
16429   // The build vector contains some number of undef elements and exactly
16430   // one other element. That other element must be a zero-extended scalar
16431   // extracted from a vector at a constant index to turn this into a shuffle.
16432   // Also, require that the build vector does not implicitly truncate/extend
16433   // its elements.
16434   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
16435   EVT VT = BV->getValueType(0);
16436   SDValue Zext = BV->getOperand(ZextElt);
16437   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
16438       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16439       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
16440       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
16441     return SDValue();
16442
16443   // The zero-extend must be a multiple of the source size, and we must be
16444   // building a vector of the same size as the source of the extract element.
16445   SDValue Extract = Zext.getOperand(0);
16446   unsigned DestSize = Zext.getValueSizeInBits();
16447   unsigned SrcSize = Extract.getValueSizeInBits();
16448   if (DestSize % SrcSize != 0 ||
16449       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
16450     return SDValue();
16451
16452   // Create a shuffle mask that will combine the extracted element with zeros
16453   // and undefs.
16454   int ZextRatio = DestSize / SrcSize;
16455   int NumMaskElts = NumBVOps * ZextRatio;
16456   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
16457   for (int i = 0; i != NumMaskElts; ++i) {
16458     if (i / ZextRatio == ZextElt) {
16459       // The low bits of the (potentially translated) extracted element map to
16460       // the source vector. The high bits map to zero. We will use a zero vector
16461       // as the 2nd source operand of the shuffle, so use the 1st element of
16462       // that vector (mask value is number-of-elements) for the high bits.
16463       if (i % ZextRatio == 0)
16464         ShufMask[i] = Extract.getConstantOperandVal(1);
16465       else
16466         ShufMask[i] = NumMaskElts;
16467     }
16468
16469     // Undef elements of the build vector remain undef because we initialize
16470     // the shuffle mask with -1.
16471   }
16472
16473   // Turn this into a shuffle with zero if that's legal.
16474   EVT VecVT = Extract.getOperand(0).getValueType();
16475   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
16476     return SDValue();
16477
16478   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
16479   // bitcast (shuffle V, ZeroVec, VectorMask)
16480   SDLoc DL(BV);
16481   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
16482   SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
16483                                       ShufMask);
16484   return DAG.getBitcast(VT, Shuf);
16485 }
16486
16487 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
16488 // operations. If the types of the vectors we're extracting from allow it,
16489 // turn this into a vector_shuffle node.
16490 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
16491   SDLoc DL(N);
16492   EVT VT = N->getValueType(0);
16493
16494   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
16495   if (!isTypeLegal(VT))
16496     return SDValue();
16497
16498   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
16499     return V;
16500
16501   // May only combine to shuffle after legalize if shuffle is legal.
16502   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
16503     return SDValue();
16504
16505   bool UsesZeroVector = false;
16506   unsigned NumElems = N->getNumOperands();
16507
16508   // Record, for each element of the newly built vector, which input vector
16509   // that element comes from. -1 stands for undef, 0 for the zero vector,
16510   // and positive values for the input vectors.
16511   // VectorMask maps each element to its vector number, and VecIn maps vector
16512   // numbers to their initial SDValues.
16513
16514   SmallVector<int, 8> VectorMask(NumElems, -1);
16515   SmallVector<SDValue, 8> VecIn;
16516   VecIn.push_back(SDValue());
16517
16518   for (unsigned i = 0; i != NumElems; ++i) {
16519     SDValue Op = N->getOperand(i);
16520
16521     if (Op.isUndef())
16522       continue;
16523
16524     // See if we can use a blend with a zero vector.
16525     // TODO: Should we generalize this to a blend with an arbitrary constant
16526     // vector?
16527     if (isNullConstant(Op) || isNullFPConstant(Op)) {
16528       UsesZeroVector = true;
16529       VectorMask[i] = 0;
16530       continue;
16531     }
16532
16533     // Not an undef or zero. If the input is something other than an
16534     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
16535     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16536         !isa<ConstantSDNode>(Op.getOperand(1)))
16537       return SDValue();
16538     SDValue ExtractedFromVec = Op.getOperand(0);
16539
16540     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
16541     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
16542       return SDValue();
16543
16544     // All inputs must have the same element type as the output.
16545     if (VT.getVectorElementType() !=
16546         ExtractedFromVec.getValueType().getVectorElementType())
16547       return SDValue();
16548
16549     // Have we seen this input vector before?
16550     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
16551     // a map back from SDValues to numbers isn't worth it.
16552     unsigned Idx = std::distance(
16553         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
16554     if (Idx == VecIn.size())
16555       VecIn.push_back(ExtractedFromVec);
16556
16557     VectorMask[i] = Idx;
16558   }
16559
16560   // If we didn't find at least one input vector, bail out.
16561   if (VecIn.size() < 2)
16562     return SDValue();
16563
16564   // If all the Operands of BUILD_VECTOR extract from same
16565   // vector, then split the vector efficiently based on the maximum
16566   // vector access index and adjust the VectorMask and
16567   // VecIn accordingly.
16568   if (VecIn.size() == 2) {
16569     unsigned MaxIndex = 0;
16570     unsigned NearestPow2 = 0;
16571     SDValue Vec = VecIn.back();
16572     EVT InVT = Vec.getValueType();
16573     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16574     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
16575
16576     for (unsigned i = 0; i < NumElems; i++) {
16577       if (VectorMask[i] <= 0)
16578         continue;
16579       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
16580       IndexVec[i] = Index;
16581       MaxIndex = std::max(MaxIndex, Index);
16582     }
16583
16584     NearestPow2 = PowerOf2Ceil(MaxIndex);
16585     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
16586         NumElems * 2 < NearestPow2) {
16587       unsigned SplitSize = NearestPow2 / 2;
16588       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
16589                                      InVT.getVectorElementType(), SplitSize);
16590       if (TLI.isTypeLegal(SplitVT)) {
16591         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16592                                      DAG.getConstant(SplitSize, DL, IdxTy));
16593         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16594                                      DAG.getConstant(0, DL, IdxTy));
16595         VecIn.pop_back();
16596         VecIn.push_back(VecIn1);
16597         VecIn.push_back(VecIn2);
16598
16599         for (unsigned i = 0; i < NumElems; i++) {
16600           if (VectorMask[i] <= 0)
16601             continue;
16602           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
16603         }
16604       }
16605     }
16606   }
16607
16608   // TODO: We want to sort the vectors by descending length, so that adjacent
16609   // pairs have similar length, and the longer vector is always first in the
16610   // pair.
16611
16612   // TODO: Should this fire if some of the input vectors has illegal type (like
16613   // it does now), or should we let legalization run its course first?
16614
16615   // Shuffle phase:
16616   // Take pairs of vectors, and shuffle them so that the result has elements
16617   // from these vectors in the correct places.
16618   // For example, given:
16619   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
16620   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
16621   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
16622   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
16623   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
16624   // We will generate:
16625   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
16626   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
16627   SmallVector<SDValue, 4> Shuffles;
16628   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
16629     unsigned LeftIdx = 2 * In + 1;
16630     SDValue VecLeft = VecIn[LeftIdx];
16631     SDValue VecRight =
16632         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
16633
16634     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
16635                                                 VecRight, LeftIdx))
16636       Shuffles.push_back(Shuffle);
16637     else
16638       return SDValue();
16639   }
16640
16641   // If we need the zero vector as an "ingredient" in the blend tree, add it
16642   // to the list of shuffles.
16643   if (UsesZeroVector)
16644     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
16645                                       : DAG.getConstantFP(0.0, DL, VT));
16646
16647   // If we only have one shuffle, we're done.
16648   if (Shuffles.size() == 1)
16649     return Shuffles[0];
16650
16651   // Update the vector mask to point to the post-shuffle vectors.
16652   for (int &Vec : VectorMask)
16653     if (Vec == 0)
16654       Vec = Shuffles.size() - 1;
16655     else
16656       Vec = (Vec - 1) / 2;
16657
16658   // More than one shuffle. Generate a binary tree of blends, e.g. if from
16659   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
16660   // generate:
16661   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
16662   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
16663   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
16664   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
16665   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
16666   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
16667   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
16668
16669   // Make sure the initial size of the shuffle list is even.
16670   if (Shuffles.size() % 2)
16671     Shuffles.push_back(DAG.getUNDEF(VT));
16672
16673   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
16674     if (CurSize % 2) {
16675       Shuffles[CurSize] = DAG.getUNDEF(VT);
16676       CurSize++;
16677     }
16678     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
16679       int Left = 2 * In;
16680       int Right = 2 * In + 1;
16681       SmallVector<int, 8> Mask(NumElems, -1);
16682       for (unsigned i = 0; i != NumElems; ++i) {
16683         if (VectorMask[i] == Left) {
16684           Mask[i] = i;
16685           VectorMask[i] = In;
16686         } else if (VectorMask[i] == Right) {
16687           Mask[i] = i + NumElems;
16688           VectorMask[i] = In;
16689         }
16690       }
16691
16692       Shuffles[In] =
16693           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
16694     }
16695   }
16696   return Shuffles[0];
16697 }
16698
16699 // Try to turn a build vector of zero extends of extract vector elts into a
16700 // a vector zero extend and possibly an extract subvector.
16701 // TODO: Support sign extend or any extend?
16702 // TODO: Allow undef elements?
16703 // TODO: Don't require the extracts to start at element 0.
16704 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
16705   if (LegalOperations)
16706     return SDValue();
16707
16708   EVT VT = N->getValueType(0);
16709
16710   SDValue Op0 = N->getOperand(0);
16711   auto checkElem = [&](SDValue Op) -> int64_t {
16712     if (Op.getOpcode() == ISD::ZERO_EXTEND &&
16713         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16714         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
16715       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
16716         return C->getZExtValue();
16717     return -1;
16718   };
16719
16720   // Make sure the first element matches
16721   // (zext (extract_vector_elt X, C))
16722   int64_t Offset = checkElem(Op0);
16723   if (Offset < 0)
16724     return SDValue();
16725
16726   unsigned NumElems = N->getNumOperands();
16727   SDValue In = Op0.getOperand(0).getOperand(0);
16728   EVT InSVT = In.getValueType().getScalarType();
16729   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
16730
16731   // Don't create an illegal input type after type legalization.
16732   if (LegalTypes && !TLI.isTypeLegal(InVT))
16733     return SDValue();
16734
16735   // Ensure all the elements come from the same vector and are adjacent.
16736   for (unsigned i = 1; i != NumElems; ++i) {
16737     if ((Offset + i) != checkElem(N->getOperand(i)))
16738       return SDValue();
16739   }
16740
16741   SDLoc DL(N);
16742   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
16743                    Op0.getOperand(0).getOperand(1));
16744   return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
16745 }
16746
16747 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
16748   EVT VT = N->getValueType(0);
16749
16750   // A vector built entirely of undefs is undef.
16751   if (ISD::allOperandsUndef(N))
16752     return DAG.getUNDEF(VT);
16753
16754   // If this is a splat of a bitcast from another vector, change to a
16755   // concat_vector.
16756   // For example:
16757   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
16758   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
16759   //
16760   // If X is a build_vector itself, the concat can become a larger build_vector.
16761   // TODO: Maybe this is useful for non-splat too?
16762   if (!LegalOperations) {
16763     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
16764       Splat = peekThroughBitcasts(Splat);
16765       EVT SrcVT = Splat.getValueType();
16766       if (SrcVT.isVector()) {
16767         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
16768         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
16769                                      SrcVT.getVectorElementType(), NumElts);
16770         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
16771           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
16772           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
16773                                        NewVT, Ops);
16774           return DAG.getBitcast(VT, Concat);
16775         }
16776       }
16777     }
16778   }
16779
16780   // Check if we can express BUILD VECTOR via subvector extract.
16781   if (!LegalTypes && (N->getNumOperands() > 1)) {
16782     SDValue Op0 = N->getOperand(0);
16783     auto checkElem = [&](SDValue Op) -> uint64_t {
16784       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
16785           (Op0.getOperand(0) == Op.getOperand(0)))
16786         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
16787           return CNode->getZExtValue();
16788       return -1;
16789     };
16790
16791     int Offset = checkElem(Op0);
16792     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
16793       if (Offset + i != checkElem(N->getOperand(i))) {
16794         Offset = -1;
16795         break;
16796       }
16797     }
16798
16799     if ((Offset == 0) &&
16800         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
16801       return Op0.getOperand(0);
16802     if ((Offset != -1) &&
16803         ((Offset % N->getValueType(0).getVectorNumElements()) ==
16804          0)) // IDX must be multiple of output size.
16805       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
16806                          Op0.getOperand(0), Op0.getOperand(1));
16807   }
16808
16809   if (SDValue V = convertBuildVecZextToZext(N))
16810     return V;
16811
16812   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
16813     return V;
16814
16815   if (SDValue V = reduceBuildVecToShuffle(N))
16816     return V;
16817
16818   return SDValue();
16819 }
16820
16821 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
16822   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16823   EVT OpVT = N->getOperand(0).getValueType();
16824
16825   // If the operands are legal vectors, leave them alone.
16826   if (TLI.isTypeLegal(OpVT))
16827     return SDValue();
16828
16829   SDLoc DL(N);
16830   EVT VT = N->getValueType(0);
16831   SmallVector<SDValue, 8> Ops;
16832
16833   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
16834   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16835
16836   // Keep track of what we encounter.
16837   bool AnyInteger = false;
16838   bool AnyFP = false;
16839   for (const SDValue &Op : N->ops()) {
16840     if (ISD::BITCAST == Op.getOpcode() &&
16841         !Op.getOperand(0).getValueType().isVector())
16842       Ops.push_back(Op.getOperand(0));
16843     else if (ISD::UNDEF == Op.getOpcode())
16844       Ops.push_back(ScalarUndef);
16845     else
16846       return SDValue();
16847
16848     // Note whether we encounter an integer or floating point scalar.
16849     // If it's neither, bail out, it could be something weird like x86mmx.
16850     EVT LastOpVT = Ops.back().getValueType();
16851     if (LastOpVT.isFloatingPoint())
16852       AnyFP = true;
16853     else if (LastOpVT.isInteger())
16854       AnyInteger = true;
16855     else
16856       return SDValue();
16857   }
16858
16859   // If any of the operands is a floating point scalar bitcast to a vector,
16860   // use floating point types throughout, and bitcast everything.
16861   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
16862   if (AnyFP) {
16863     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
16864     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16865     if (AnyInteger) {
16866       for (SDValue &Op : Ops) {
16867         if (Op.getValueType() == SVT)
16868           continue;
16869         if (Op.isUndef())
16870           Op = ScalarUndef;
16871         else
16872           Op = DAG.getBitcast(SVT, Op);
16873       }
16874     }
16875   }
16876
16877   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
16878                                VT.getSizeInBits() / SVT.getSizeInBits());
16879   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
16880 }
16881
16882 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
16883 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
16884 // most two distinct vectors the same size as the result, attempt to turn this
16885 // into a legal shuffle.
16886 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
16887   EVT VT = N->getValueType(0);
16888   EVT OpVT = N->getOperand(0).getValueType();
16889   int NumElts = VT.getVectorNumElements();
16890   int NumOpElts = OpVT.getVectorNumElements();
16891
16892   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
16893   SmallVector<int, 8> Mask;
16894
16895   for (SDValue Op : N->ops()) {
16896     Op = peekThroughBitcasts(Op);
16897
16898     // UNDEF nodes convert to UNDEF shuffle mask values.
16899     if (Op.isUndef()) {
16900       Mask.append((unsigned)NumOpElts, -1);
16901       continue;
16902     }
16903
16904     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16905       return SDValue();
16906
16907     // What vector are we extracting the subvector from and at what index?
16908     SDValue ExtVec = Op.getOperand(0);
16909
16910     // We want the EVT of the original extraction to correctly scale the
16911     // extraction index.
16912     EVT ExtVT = ExtVec.getValueType();
16913     ExtVec = peekThroughBitcasts(ExtVec);
16914
16915     // UNDEF nodes convert to UNDEF shuffle mask values.
16916     if (ExtVec.isUndef()) {
16917       Mask.append((unsigned)NumOpElts, -1);
16918       continue;
16919     }
16920
16921     if (!isa<ConstantSDNode>(Op.getOperand(1)))
16922       return SDValue();
16923     int ExtIdx = Op.getConstantOperandVal(1);
16924
16925     // Ensure that we are extracting a subvector from a vector the same
16926     // size as the result.
16927     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
16928       return SDValue();
16929
16930     // Scale the subvector index to account for any bitcast.
16931     int NumExtElts = ExtVT.getVectorNumElements();
16932     if (0 == (NumExtElts % NumElts))
16933       ExtIdx /= (NumExtElts / NumElts);
16934     else if (0 == (NumElts % NumExtElts))
16935       ExtIdx *= (NumElts / NumExtElts);
16936     else
16937       return SDValue();
16938
16939     // At most we can reference 2 inputs in the final shuffle.
16940     if (SV0.isUndef() || SV0 == ExtVec) {
16941       SV0 = ExtVec;
16942       for (int i = 0; i != NumOpElts; ++i)
16943         Mask.push_back(i + ExtIdx);
16944     } else if (SV1.isUndef() || SV1 == ExtVec) {
16945       SV1 = ExtVec;
16946       for (int i = 0; i != NumOpElts; ++i)
16947         Mask.push_back(i + ExtIdx + NumElts);
16948     } else {
16949       return SDValue();
16950     }
16951   }
16952
16953   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
16954     return SDValue();
16955
16956   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
16957                               DAG.getBitcast(VT, SV1), Mask);
16958 }
16959
16960 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
16961   // If we only have one input vector, we don't need to do any concatenation.
16962   if (N->getNumOperands() == 1)
16963     return N->getOperand(0);
16964
16965   // Check if all of the operands are undefs.
16966   EVT VT = N->getValueType(0);
16967   if (ISD::allOperandsUndef(N))
16968     return DAG.getUNDEF(VT);
16969
16970   // Optimize concat_vectors where all but the first of the vectors are undef.
16971   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
16972         return Op.isUndef();
16973       })) {
16974     SDValue In = N->getOperand(0);
16975     assert(In.getValueType().isVector() && "Must concat vectors");
16976
16977     SDValue Scalar = peekThroughOneUseBitcasts(In);
16978
16979     // concat_vectors(scalar_to_vector(scalar), undef) ->
16980     //     scalar_to_vector(scalar)
16981     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16982          Scalar.hasOneUse()) {
16983       EVT SVT = Scalar.getValueType().getVectorElementType();
16984       if (SVT == Scalar.getOperand(0).getValueType())
16985         Scalar = Scalar.getOperand(0);
16986     }
16987
16988     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
16989     if (!Scalar.getValueType().isVector()) {
16990       // If the bitcast type isn't legal, it might be a trunc of a legal type;
16991       // look through the trunc so we can still do the transform:
16992       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
16993       if (Scalar->getOpcode() == ISD::TRUNCATE &&
16994           !TLI.isTypeLegal(Scalar.getValueType()) &&
16995           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
16996         Scalar = Scalar->getOperand(0);
16997
16998       EVT SclTy = Scalar.getValueType();
16999
17000       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
17001         return SDValue();
17002
17003       // Bail out if the vector size is not a multiple of the scalar size.
17004       if (VT.getSizeInBits() % SclTy.getSizeInBits())
17005         return SDValue();
17006
17007       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
17008       if (VNTNumElms < 2)
17009         return SDValue();
17010
17011       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
17012       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
17013         return SDValue();
17014
17015       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
17016       return DAG.getBitcast(VT, Res);
17017     }
17018   }
17019
17020   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
17021   // We have already tested above for an UNDEF only concatenation.
17022   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
17023   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
17024   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
17025     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
17026   };
17027   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
17028     SmallVector<SDValue, 8> Opnds;
17029     EVT SVT = VT.getScalarType();
17030
17031     EVT MinVT = SVT;
17032     if (!SVT.isFloatingPoint()) {
17033       // If BUILD_VECTOR are from built from integer, they may have different
17034       // operand types. Get the smallest type and truncate all operands to it.
17035       bool FoundMinVT = false;
17036       for (const SDValue &Op : N->ops())
17037         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17038           EVT OpSVT = Op.getOperand(0).getValueType();
17039           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
17040           FoundMinVT = true;
17041         }
17042       assert(FoundMinVT && "Concat vector type mismatch");
17043     }
17044
17045     for (const SDValue &Op : N->ops()) {
17046       EVT OpVT = Op.getValueType();
17047       unsigned NumElts = OpVT.getVectorNumElements();
17048
17049       if (ISD::UNDEF == Op.getOpcode())
17050         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
17051
17052       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17053         if (SVT.isFloatingPoint()) {
17054           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
17055           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
17056         } else {
17057           for (unsigned i = 0; i != NumElts; ++i)
17058             Opnds.push_back(
17059                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
17060         }
17061       }
17062     }
17063
17064     assert(VT.getVectorNumElements() == Opnds.size() &&
17065            "Concat vector type mismatch");
17066     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
17067   }
17068
17069   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
17070   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
17071     return V;
17072
17073   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
17074   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17075     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
17076       return V;
17077
17078   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
17079   // nodes often generate nop CONCAT_VECTOR nodes.
17080   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
17081   // place the incoming vectors at the exact same location.
17082   SDValue SingleSource = SDValue();
17083   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
17084
17085   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17086     SDValue Op = N->getOperand(i);
17087
17088     if (Op.isUndef())
17089       continue;
17090
17091     // Check if this is the identity extract:
17092     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17093       return SDValue();
17094
17095     // Find the single incoming vector for the extract_subvector.
17096     if (SingleSource.getNode()) {
17097       if (Op.getOperand(0) != SingleSource)
17098         return SDValue();
17099     } else {
17100       SingleSource = Op.getOperand(0);
17101
17102       // Check the source type is the same as the type of the result.
17103       // If not, this concat may extend the vector, so we can not
17104       // optimize it away.
17105       if (SingleSource.getValueType() != N->getValueType(0))
17106         return SDValue();
17107     }
17108
17109     unsigned IdentityIndex = i * PartNumElem;
17110     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17111     // The extract index must be constant.
17112     if (!CS)
17113       return SDValue();
17114
17115     // Check that we are reading from the identity index.
17116     if (CS->getZExtValue() != IdentityIndex)
17117       return SDValue();
17118   }
17119
17120   if (SingleSource.getNode())
17121     return SingleSource;
17122
17123   return SDValue();
17124 }
17125
17126 /// If we are extracting a subvector produced by a wide binary operator try
17127 /// to use a narrow binary operator and/or avoid concatenation and extraction.
17128 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
17129   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
17130   // some of these bailouts with other transforms.
17131
17132   // The extract index must be a constant, so we can map it to a concat operand.
17133   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17134   if (!ExtractIndexC)
17135     return SDValue();
17136
17137   // We are looking for an optionally bitcasted wide vector binary operator
17138   // feeding an extract subvector.
17139   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
17140   if (!ISD::isBinaryOp(BinOp.getNode()))
17141     return SDValue();
17142
17143   // The binop must be a vector type, so we can extract some fraction of it.
17144   EVT WideBVT = BinOp.getValueType();
17145   if (!WideBVT.isVector())
17146     return SDValue();
17147
17148   EVT VT = Extract->getValueType(0);
17149   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
17150   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
17151          "Extract index is not a multiple of the vector length.");
17152
17153   // Bail out if this is not a proper multiple width extraction.
17154   unsigned WideWidth = WideBVT.getSizeInBits();
17155   unsigned NarrowWidth = VT.getSizeInBits();
17156   if (WideWidth % NarrowWidth != 0)
17157     return SDValue();
17158
17159   // Bail out if we are extracting a fraction of a single operation. This can
17160   // occur because we potentially looked through a bitcast of the binop.
17161   unsigned NarrowingRatio = WideWidth / NarrowWidth;
17162   unsigned WideNumElts = WideBVT.getVectorNumElements();
17163   if (WideNumElts % NarrowingRatio != 0)
17164     return SDValue();
17165
17166   // Bail out if the target does not support a narrower version of the binop.
17167   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
17168                                    WideNumElts / NarrowingRatio);
17169   unsigned BOpcode = BinOp.getOpcode();
17170   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17171   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
17172     return SDValue();
17173
17174   // If extraction is cheap, we don't need to look at the binop operands
17175   // for concat ops. The narrow binop alone makes this transform profitable.
17176   // We can't just reuse the original extract index operand because we may have
17177   // bitcasted.
17178   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
17179   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
17180   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
17181   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
17182       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
17183     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
17184     SDLoc DL(Extract);
17185     SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
17186     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17187                             BinOp.getOperand(0), NewExtIndex);
17188     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17189                             BinOp.getOperand(1), NewExtIndex);
17190     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
17191                                       BinOp.getNode()->getFlags());
17192     return DAG.getBitcast(VT, NarrowBinOp);
17193   }
17194
17195   // Only handle the case where we are doubling and then halving. A larger ratio
17196   // may require more than two narrow binops to replace the wide binop.
17197   if (NarrowingRatio != 2)
17198     return SDValue();
17199
17200   // TODO: The motivating case for this transform is an x86 AVX1 target. That
17201   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
17202   // flavors, but no other 256-bit integer support. This could be extended to
17203   // handle any binop, but that may require fixing/adding other folds to avoid
17204   // codegen regressions.
17205   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
17206     return SDValue();
17207
17208   // We need at least one concatenation operation of a binop operand to make
17209   // this transform worthwhile. The concat must double the input vector sizes.
17210   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
17211   SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
17212   SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
17213   bool ConcatL =
17214       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
17215   bool ConcatR =
17216       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
17217   if (!ConcatL && !ConcatR)
17218     return SDValue();
17219
17220   // If one of the binop operands was not the result of a concat, we must
17221   // extract a half-sized operand for our new narrow binop.
17222   SDLoc DL(Extract);
17223
17224   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
17225   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
17226   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
17227   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
17228                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17229                                     BinOp.getOperand(0),
17230                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
17231
17232   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
17233                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17234                                     BinOp.getOperand(1),
17235                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
17236
17237   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
17238   return DAG.getBitcast(VT, NarrowBinOp);
17239 }
17240
17241 /// If we are extracting a subvector from a wide vector load, convert to a
17242 /// narrow load to eliminate the extraction:
17243 /// (extract_subvector (load wide vector)) --> (load narrow vector)
17244 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
17245   // TODO: Add support for big-endian. The offset calculation must be adjusted.
17246   if (DAG.getDataLayout().isBigEndian())
17247     return SDValue();
17248
17249   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
17250   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17251   if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
17252     return SDValue();
17253
17254   // Allow targets to opt-out.
17255   EVT VT = Extract->getValueType(0);
17256   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17257   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
17258     return SDValue();
17259
17260   // The narrow load will be offset from the base address of the old load if
17261   // we are extracting from something besides index 0 (little-endian).
17262   SDLoc DL(Extract);
17263   SDValue BaseAddr = Ld->getOperand(1);
17264   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
17265
17266   // TODO: Use "BaseIndexOffset" to make this more effective.
17267   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
17268   MachineFunction &MF = DAG.getMachineFunction();
17269   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
17270                                                    VT.getStoreSize());
17271   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
17272   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
17273   return NewLd;
17274 }
17275
17276 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
17277   EVT NVT = N->getValueType(0);
17278   SDValue V = N->getOperand(0);
17279
17280   // Extract from UNDEF is UNDEF.
17281   if (V.isUndef())
17282     return DAG.getUNDEF(NVT);
17283
17284   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
17285     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
17286       return NarrowLoad;
17287
17288   // Combine an extract of an extract into a single extract_subvector.
17289   // ext (ext X, C), 0 --> ext X, C
17290   if (isNullConstant(N->getOperand(1)) &&
17291       V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse() &&
17292       isa<ConstantSDNode>(V.getOperand(1))) {
17293     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
17294                                     V.getConstantOperandVal(1)) &&
17295         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
17296       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
17297                          V.getOperand(1));
17298     }
17299   }
17300
17301   // Combine:
17302   //    (extract_subvec (concat V1, V2, ...), i)
17303   // Into:
17304   //    Vi if possible
17305   // Only operand 0 is checked as 'concat' assumes all inputs of the same
17306   // type.
17307   if (V.getOpcode() == ISD::CONCAT_VECTORS &&
17308       isa<ConstantSDNode>(N->getOperand(1)) &&
17309       V.getOperand(0).getValueType() == NVT) {
17310     unsigned Idx = N->getConstantOperandVal(1);
17311     unsigned NumElems = NVT.getVectorNumElements();
17312     assert((Idx % NumElems) == 0 &&
17313            "IDX in concat is not a multiple of the result vector length.");
17314     return V->getOperand(Idx / NumElems);
17315   }
17316
17317   V = peekThroughBitcasts(V);
17318
17319   // If the input is a build vector. Try to make a smaller build vector.
17320   if (V.getOpcode() == ISD::BUILD_VECTOR) {
17321     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
17322       EVT InVT = V.getValueType();
17323       unsigned ExtractSize = NVT.getSizeInBits();
17324       unsigned EltSize = InVT.getScalarSizeInBits();
17325       // Only do this if we won't split any elements.
17326       if (ExtractSize % EltSize == 0) {
17327         unsigned NumElems = ExtractSize / EltSize;
17328         EVT EltVT = InVT.getVectorElementType();
17329         EVT ExtractVT = NumElems == 1 ? EltVT :
17330           EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
17331         if ((Level < AfterLegalizeDAG ||
17332              (NumElems == 1 ||
17333               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
17334             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
17335           unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
17336                             EltSize;
17337           if (NumElems == 1) {
17338             SDValue Src = V->getOperand(IdxVal);
17339             if (EltVT != Src.getValueType())
17340               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
17341
17342             return DAG.getBitcast(NVT, Src);
17343           }
17344
17345           // Extract the pieces from the original build_vector.
17346           SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
17347                                             makeArrayRef(V->op_begin() + IdxVal,
17348                                                          NumElems));
17349           return DAG.getBitcast(NVT, BuildVec);
17350         }
17351       }
17352     }
17353   }
17354
17355   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
17356     // Handle only simple case where vector being inserted and vector
17357     // being extracted are of same size.
17358     EVT SmallVT = V.getOperand(1).getValueType();
17359     if (!NVT.bitsEq(SmallVT))
17360       return SDValue();
17361
17362     // Only handle cases where both indexes are constants.
17363     auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
17364     auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
17365
17366     if (InsIdx && ExtIdx) {
17367       // Combine:
17368       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
17369       // Into:
17370       //    indices are equal or bit offsets are equal => V1
17371       //    otherwise => (extract_subvec V1, ExtIdx)
17372       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
17373           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
17374         return DAG.getBitcast(NVT, V.getOperand(1));
17375       return DAG.getNode(
17376           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
17377           DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
17378                          N->getOperand(1));
17379     }
17380   }
17381
17382   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
17383     return NarrowBOp;
17384
17385   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17386     return SDValue(N, 0);
17387
17388   return SDValue();
17389 }
17390
17391 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
17392 // or turn a shuffle of a single concat into simpler shuffle then concat.
17393 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
17394   EVT VT = N->getValueType(0);
17395   unsigned NumElts = VT.getVectorNumElements();
17396
17397   SDValue N0 = N->getOperand(0);
17398   SDValue N1 = N->getOperand(1);
17399   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17400
17401   SmallVector<SDValue, 4> Ops;
17402   EVT ConcatVT = N0.getOperand(0).getValueType();
17403   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
17404   unsigned NumConcats = NumElts / NumElemsPerConcat;
17405
17406   // Special case: shuffle(concat(A,B)) can be more efficiently represented
17407   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
17408   // half vector elements.
17409   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
17410       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
17411                   SVN->getMask().end(), [](int i) { return i == -1; })) {
17412     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
17413                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
17414     N1 = DAG.getUNDEF(ConcatVT);
17415     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
17416   }
17417
17418   // Look at every vector that's inserted. We're looking for exact
17419   // subvector-sized copies from a concatenated vector
17420   for (unsigned I = 0; I != NumConcats; ++I) {
17421     // Make sure we're dealing with a copy.
17422     unsigned Begin = I * NumElemsPerConcat;
17423     bool AllUndef = true, NoUndef = true;
17424     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
17425       if (SVN->getMaskElt(J) >= 0)
17426         AllUndef = false;
17427       else
17428         NoUndef = false;
17429     }
17430
17431     if (NoUndef) {
17432       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
17433         return SDValue();
17434
17435       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
17436         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
17437           return SDValue();
17438
17439       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
17440       if (FirstElt < N0.getNumOperands())
17441         Ops.push_back(N0.getOperand(FirstElt));
17442       else
17443         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
17444
17445     } else if (AllUndef) {
17446       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
17447     } else { // Mixed with general masks and undefs, can't do optimization.
17448       return SDValue();
17449     }
17450   }
17451
17452   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17453 }
17454
17455 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17456 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17457 //
17458 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
17459 // a simplification in some sense, but it isn't appropriate in general: some
17460 // BUILD_VECTORs are substantially cheaper than others. The general case
17461 // of a BUILD_VECTOR requires inserting each element individually (or
17462 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
17463 // all constants is a single constant pool load.  A BUILD_VECTOR where each
17464 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
17465 // are undef lowers to a small number of element insertions.
17466 //
17467 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
17468 // We don't fold shuffles where one side is a non-zero constant, and we don't
17469 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
17470 // non-constant operands. This seems to work out reasonably well in practice.
17471 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
17472                                        SelectionDAG &DAG,
17473                                        const TargetLowering &TLI) {
17474   EVT VT = SVN->getValueType(0);
17475   unsigned NumElts = VT.getVectorNumElements();
17476   SDValue N0 = SVN->getOperand(0);
17477   SDValue N1 = SVN->getOperand(1);
17478
17479   if (!N0->hasOneUse())
17480     return SDValue();
17481
17482   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
17483   // discussed above.
17484   if (!N1.isUndef()) {
17485     if (!N1->hasOneUse())
17486       return SDValue();
17487
17488     bool N0AnyConst = isAnyConstantBuildVector(N0);
17489     bool N1AnyConst = isAnyConstantBuildVector(N1);
17490     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
17491       return SDValue();
17492     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
17493       return SDValue();
17494   }
17495
17496   // If both inputs are splats of the same value then we can safely merge this
17497   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
17498   bool IsSplat = false;
17499   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
17500   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
17501   if (BV0 && BV1)
17502     if (SDValue Splat0 = BV0->getSplatValue())
17503       IsSplat = (Splat0 == BV1->getSplatValue());
17504
17505   SmallVector<SDValue, 8> Ops;
17506   SmallSet<SDValue, 16> DuplicateOps;
17507   for (int M : SVN->getMask()) {
17508     SDValue Op = DAG.getUNDEF(VT.getScalarType());
17509     if (M >= 0) {
17510       int Idx = M < (int)NumElts ? M : M - NumElts;
17511       SDValue &S = (M < (int)NumElts ? N0 : N1);
17512       if (S.getOpcode() == ISD::BUILD_VECTOR) {
17513         Op = S.getOperand(Idx);
17514       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17515         assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
17516         Op = S.getOperand(0);
17517       } else {
17518         // Operand can't be combined - bail out.
17519         return SDValue();
17520       }
17521     }
17522
17523     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
17524     // generating a splat; semantically, this is fine, but it's likely to
17525     // generate low-quality code if the target can't reconstruct an appropriate
17526     // shuffle.
17527     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
17528       if (!IsSplat && !DuplicateOps.insert(Op).second)
17529         return SDValue();
17530
17531     Ops.push_back(Op);
17532   }
17533
17534   // BUILD_VECTOR requires all inputs to be of the same type, find the
17535   // maximum type and extend them all.
17536   EVT SVT = VT.getScalarType();
17537   if (SVT.isInteger())
17538     for (SDValue &Op : Ops)
17539       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
17540   if (SVT != VT.getScalarType())
17541     for (SDValue &Op : Ops)
17542       Op = TLI.isZExtFree(Op.getValueType(), SVT)
17543                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
17544                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
17545   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
17546 }
17547
17548 // Match shuffles that can be converted to any_vector_extend_in_reg.
17549 // This is often generated during legalization.
17550 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
17551 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
17552 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
17553                                             SelectionDAG &DAG,
17554                                             const TargetLowering &TLI,
17555                                             bool LegalOperations) {
17556   EVT VT = SVN->getValueType(0);
17557   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17558
17559   // TODO Add support for big-endian when we have a test case.
17560   if (!VT.isInteger() || IsBigEndian)
17561     return SDValue();
17562
17563   unsigned NumElts = VT.getVectorNumElements();
17564   unsigned EltSizeInBits = VT.getScalarSizeInBits();
17565   ArrayRef<int> Mask = SVN->getMask();
17566   SDValue N0 = SVN->getOperand(0);
17567
17568   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
17569   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
17570     for (unsigned i = 0; i != NumElts; ++i) {
17571       if (Mask[i] < 0)
17572         continue;
17573       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
17574         continue;
17575       return false;
17576     }
17577     return true;
17578   };
17579
17580   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
17581   // power-of-2 extensions as they are the most likely.
17582   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
17583     // Check for non power of 2 vector sizes
17584     if (NumElts % Scale != 0)
17585       continue;
17586     if (!isAnyExtend(Scale))
17587       continue;
17588
17589     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
17590     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
17591     // Never create an illegal type. Only create unsupported operations if we
17592     // are pre-legalization.
17593     if (TLI.isTypeLegal(OutVT))
17594       if (!LegalOperations ||
17595           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
17596         return DAG.getBitcast(VT,
17597                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
17598                                           SDLoc(SVN), OutVT, N0));
17599   }
17600
17601   return SDValue();
17602 }
17603
17604 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
17605 // each source element of a large type into the lowest elements of a smaller
17606 // destination type. This is often generated during legalization.
17607 // If the source node itself was a '*_extend_vector_inreg' node then we should
17608 // then be able to remove it.
17609 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
17610                                         SelectionDAG &DAG) {
17611   EVT VT = SVN->getValueType(0);
17612   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17613
17614   // TODO Add support for big-endian when we have a test case.
17615   if (!VT.isInteger() || IsBigEndian)
17616     return SDValue();
17617
17618   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
17619
17620   unsigned Opcode = N0.getOpcode();
17621   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
17622       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
17623       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
17624     return SDValue();
17625
17626   SDValue N00 = N0.getOperand(0);
17627   ArrayRef<int> Mask = SVN->getMask();
17628   unsigned NumElts = VT.getVectorNumElements();
17629   unsigned EltSizeInBits = VT.getScalarSizeInBits();
17630   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
17631   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
17632
17633   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
17634     return SDValue();
17635   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
17636
17637   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
17638   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
17639   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
17640   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
17641     for (unsigned i = 0; i != NumElts; ++i) {
17642       if (Mask[i] < 0)
17643         continue;
17644       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
17645         continue;
17646       return false;
17647     }
17648     return true;
17649   };
17650
17651   // At the moment we just handle the case where we've truncated back to the
17652   // same size as before the extension.
17653   // TODO: handle more extension/truncation cases as cases arise.
17654   if (EltSizeInBits != ExtSrcSizeInBits)
17655     return SDValue();
17656
17657   // We can remove *extend_vector_inreg only if the truncation happens at
17658   // the same scale as the extension.
17659   if (isTruncate(ExtScale))
17660     return DAG.getBitcast(VT, N00);
17661
17662   return SDValue();
17663 }
17664
17665 // Combine shuffles of splat-shuffles of the form:
17666 // shuffle (shuffle V, undef, splat-mask), undef, M
17667 // If splat-mask contains undef elements, we need to be careful about
17668 // introducing undef's in the folded mask which are not the result of composing
17669 // the masks of the shuffles.
17670 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
17671                                      ShuffleVectorSDNode *Splat,
17672                                      SelectionDAG &DAG) {
17673   ArrayRef<int> SplatMask = Splat->getMask();
17674   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
17675
17676   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
17677   // every undef mask element in the splat-shuffle has a corresponding undef
17678   // element in the user-shuffle's mask or if the composition of mask elements
17679   // would result in undef.
17680   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
17681   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
17682   //   In this case it is not legal to simplify to the splat-shuffle because we
17683   //   may be exposing the users of the shuffle an undef element at index 1
17684   //   which was not there before the combine.
17685   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
17686   //   In this case the composition of masks yields SplatMask, so it's ok to
17687   //   simplify to the splat-shuffle.
17688   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
17689   //   In this case the composed mask includes all undef elements of SplatMask
17690   //   and in addition sets element zero to undef. It is safe to simplify to
17691   //   the splat-shuffle.
17692   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
17693                                        ArrayRef<int> SplatMask) {
17694     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
17695       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
17696           SplatMask[UserMask[i]] != -1)
17697         return false;
17698     return true;
17699   };
17700   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
17701     return SDValue(Splat, 0);
17702
17703   // Create a new shuffle with a mask that is composed of the two shuffles'
17704   // masks.
17705   SmallVector<int, 32> NewMask;
17706   for (int Idx : UserMask)
17707     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
17708
17709   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
17710                               Splat->getOperand(0), Splat->getOperand(1),
17711                               NewMask);
17712 }
17713
17714 /// If the shuffle mask is taking exactly one element from the first vector
17715 /// operand and passing through all other elements from the second vector
17716 /// operand, return the index of the mask element that is choosing an element
17717 /// from the first operand. Otherwise, return -1.
17718 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
17719   int MaskSize = Mask.size();
17720   int EltFromOp0 = -1;
17721   // TODO: This does not match if there are undef elements in the shuffle mask.
17722   // Should we ignore undefs in the shuffle mask instead? The trade-off is
17723   // removing an instruction (a shuffle), but losing the knowledge that some
17724   // vector lanes are not needed.
17725   for (int i = 0; i != MaskSize; ++i) {
17726     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
17727       // We're looking for a shuffle of exactly one element from operand 0.
17728       if (EltFromOp0 != -1)
17729         return -1;
17730       EltFromOp0 = i;
17731     } else if (Mask[i] != i + MaskSize) {
17732       // Nothing from operand 1 can change lanes.
17733       return -1;
17734     }
17735   }
17736   return EltFromOp0;
17737 }
17738
17739 /// If a shuffle inserts exactly one element from a source vector operand into
17740 /// another vector operand and we can access the specified element as a scalar,
17741 /// then we can eliminate the shuffle.
17742 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
17743                                       SelectionDAG &DAG) {
17744   // First, check if we are taking one element of a vector and shuffling that
17745   // element into another vector.
17746   ArrayRef<int> Mask = Shuf->getMask();
17747   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
17748   SDValue Op0 = Shuf->getOperand(0);
17749   SDValue Op1 = Shuf->getOperand(1);
17750   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
17751   if (ShufOp0Index == -1) {
17752     // Commute mask and check again.
17753     ShuffleVectorSDNode::commuteMask(CommutedMask);
17754     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
17755     if (ShufOp0Index == -1)
17756       return SDValue();
17757     // Commute operands to match the commuted shuffle mask.
17758     std::swap(Op0, Op1);
17759     Mask = CommutedMask;
17760   }
17761
17762   // The shuffle inserts exactly one element from operand 0 into operand 1.
17763   // Now see if we can access that element as a scalar via a real insert element
17764   // instruction.
17765   // TODO: We can try harder to locate the element as a scalar. Examples: it
17766   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
17767   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
17768          "Shuffle mask value must be from operand 0");
17769   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
17770     return SDValue();
17771
17772   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
17773   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
17774     return SDValue();
17775
17776   // There's an existing insertelement with constant insertion index, so we
17777   // don't need to check the legality/profitability of a replacement operation
17778   // that differs at most in the constant value. The target should be able to
17779   // lower any of those in a similar way. If not, legalization will expand this
17780   // to a scalar-to-vector plus shuffle.
17781   //
17782   // Note that the shuffle may move the scalar from the position that the insert
17783   // element used. Therefore, our new insert element occurs at the shuffle's
17784   // mask index value, not the insert's index value.
17785   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
17786   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
17787                                         Op0.getOperand(2).getValueType());
17788   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
17789                      Op1, Op0.getOperand(1), NewInsIndex);
17790 }
17791
17792 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
17793   EVT VT = N->getValueType(0);
17794   unsigned NumElts = VT.getVectorNumElements();
17795
17796   SDValue N0 = N->getOperand(0);
17797   SDValue N1 = N->getOperand(1);
17798
17799   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
17800
17801   // Canonicalize shuffle undef, undef -> undef
17802   if (N0.isUndef() && N1.isUndef())
17803     return DAG.getUNDEF(VT);
17804
17805   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17806
17807   // Canonicalize shuffle v, v -> v, undef
17808   if (N0 == N1) {
17809     SmallVector<int, 8> NewMask;
17810     for (unsigned i = 0; i != NumElts; ++i) {
17811       int Idx = SVN->getMaskElt(i);
17812       if (Idx >= (int)NumElts) Idx -= NumElts;
17813       NewMask.push_back(Idx);
17814     }
17815     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
17816   }
17817
17818   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
17819   if (N0.isUndef())
17820     return DAG.getCommutedVectorShuffle(*SVN);
17821
17822   // Remove references to rhs if it is undef
17823   if (N1.isUndef()) {
17824     bool Changed = false;
17825     SmallVector<int, 8> NewMask;
17826     for (unsigned i = 0; i != NumElts; ++i) {
17827       int Idx = SVN->getMaskElt(i);
17828       if (Idx >= (int)NumElts) {
17829         Idx = -1;
17830         Changed = true;
17831       }
17832       NewMask.push_back(Idx);
17833     }
17834     if (Changed)
17835       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
17836   }
17837
17838   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
17839     return InsElt;
17840
17841   // A shuffle of a single vector that is a splat can always be folded.
17842   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
17843     if (N1->isUndef() && N0Shuf->isSplat())
17844       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
17845
17846   // If it is a splat, check if the argument vector is another splat or a
17847   // build_vector.
17848   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
17849     SDNode *V = N0.getNode();
17850
17851     // If this is a bit convert that changes the element type of the vector but
17852     // not the number of vector elements, look through it.  Be careful not to
17853     // look though conversions that change things like v4f32 to v2f64.
17854     if (V->getOpcode() == ISD::BITCAST) {
17855       SDValue ConvInput = V->getOperand(0);
17856       if (ConvInput.getValueType().isVector() &&
17857           ConvInput.getValueType().getVectorNumElements() == NumElts)
17858         V = ConvInput.getNode();
17859     }
17860
17861     if (V->getOpcode() == ISD::BUILD_VECTOR) {
17862       assert(V->getNumOperands() == NumElts &&
17863              "BUILD_VECTOR has wrong number of operands");
17864       SDValue Base;
17865       bool AllSame = true;
17866       for (unsigned i = 0; i != NumElts; ++i) {
17867         if (!V->getOperand(i).isUndef()) {
17868           Base = V->getOperand(i);
17869           break;
17870         }
17871       }
17872       // Splat of <u, u, u, u>, return <u, u, u, u>
17873       if (!Base.getNode())
17874         return N0;
17875       for (unsigned i = 0; i != NumElts; ++i) {
17876         if (V->getOperand(i) != Base) {
17877           AllSame = false;
17878           break;
17879         }
17880       }
17881       // Splat of <x, x, x, x>, return <x, x, x, x>
17882       if (AllSame)
17883         return N0;
17884
17885       // Canonicalize any other splat as a build_vector.
17886       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
17887       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
17888       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
17889
17890       // We may have jumped through bitcasts, so the type of the
17891       // BUILD_VECTOR may not match the type of the shuffle.
17892       if (V->getValueType(0) != VT)
17893         NewBV = DAG.getBitcast(VT, NewBV);
17894       return NewBV;
17895     }
17896   }
17897
17898   // Simplify source operands based on shuffle mask.
17899   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17900     return SDValue(N, 0);
17901
17902   // Match shuffles that can be converted to any_vector_extend_in_reg.
17903   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
17904     return V;
17905
17906   // Combine "truncate_vector_in_reg" style shuffles.
17907   if (SDValue V = combineTruncationShuffle(SVN, DAG))
17908     return V;
17909
17910   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
17911       Level < AfterLegalizeVectorOps &&
17912       (N1.isUndef() ||
17913       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
17914        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
17915     if (SDValue V = partitionShuffleOfConcats(N, DAG))
17916       return V;
17917   }
17918
17919   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17920   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17921   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
17922     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
17923       return Res;
17924
17925   // If this shuffle only has a single input that is a bitcasted shuffle,
17926   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
17927   // back to their original types.
17928   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
17929       N1.isUndef() && Level < AfterLegalizeVectorOps &&
17930       TLI.isTypeLegal(VT)) {
17931     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
17932       if (Scale == 1)
17933         return SmallVector<int, 8>(Mask.begin(), Mask.end());
17934
17935       SmallVector<int, 8> NewMask;
17936       for (int M : Mask)
17937         for (int s = 0; s != Scale; ++s)
17938           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
17939       return NewMask;
17940     };
17941
17942     SDValue BC0 = peekThroughOneUseBitcasts(N0);
17943     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
17944       EVT SVT = VT.getScalarType();
17945       EVT InnerVT = BC0->getValueType(0);
17946       EVT InnerSVT = InnerVT.getScalarType();
17947
17948       // Determine which shuffle works with the smaller scalar type.
17949       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
17950       EVT ScaleSVT = ScaleVT.getScalarType();
17951
17952       if (TLI.isTypeLegal(ScaleVT) &&
17953           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
17954           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
17955         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17956         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17957
17958         // Scale the shuffle masks to the smaller scalar type.
17959         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
17960         SmallVector<int, 8> InnerMask =
17961             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
17962         SmallVector<int, 8> OuterMask =
17963             ScaleShuffleMask(SVN->getMask(), OuterScale);
17964
17965         // Merge the shuffle masks.
17966         SmallVector<int, 8> NewMask;
17967         for (int M : OuterMask)
17968           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
17969
17970         // Test for shuffle mask legality over both commutations.
17971         SDValue SV0 = BC0->getOperand(0);
17972         SDValue SV1 = BC0->getOperand(1);
17973         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17974         if (!LegalMask) {
17975           std::swap(SV0, SV1);
17976           ShuffleVectorSDNode::commuteMask(NewMask);
17977           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17978         }
17979
17980         if (LegalMask) {
17981           SV0 = DAG.getBitcast(ScaleVT, SV0);
17982           SV1 = DAG.getBitcast(ScaleVT, SV1);
17983           return DAG.getBitcast(
17984               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
17985         }
17986       }
17987     }
17988   }
17989
17990   // Canonicalize shuffles according to rules:
17991   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
17992   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
17993   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
17994   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
17995       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
17996       TLI.isTypeLegal(VT)) {
17997     // The incoming shuffle must be of the same type as the result of the
17998     // current shuffle.
17999     assert(N1->getOperand(0).getValueType() == VT &&
18000            "Shuffle types don't match");
18001
18002     SDValue SV0 = N1->getOperand(0);
18003     SDValue SV1 = N1->getOperand(1);
18004     bool HasSameOp0 = N0 == SV0;
18005     bool IsSV1Undef = SV1.isUndef();
18006     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
18007       // Commute the operands of this shuffle so that next rule
18008       // will trigger.
18009       return DAG.getCommutedVectorShuffle(*SVN);
18010   }
18011
18012   // Try to fold according to rules:
18013   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18014   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18015   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18016   // Don't try to fold shuffles with illegal type.
18017   // Only fold if this shuffle is the only user of the other shuffle.
18018   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
18019       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
18020     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
18021
18022     // Don't try to fold splats; they're likely to simplify somehow, or they
18023     // might be free.
18024     if (OtherSV->isSplat())
18025       return SDValue();
18026
18027     // The incoming shuffle must be of the same type as the result of the
18028     // current shuffle.
18029     assert(OtherSV->getOperand(0).getValueType() == VT &&
18030            "Shuffle types don't match");
18031
18032     SDValue SV0, SV1;
18033     SmallVector<int, 4> Mask;
18034     // Compute the combined shuffle mask for a shuffle with SV0 as the first
18035     // operand, and SV1 as the second operand.
18036     for (unsigned i = 0; i != NumElts; ++i) {
18037       int Idx = SVN->getMaskElt(i);
18038       if (Idx < 0) {
18039         // Propagate Undef.
18040         Mask.push_back(Idx);
18041         continue;
18042       }
18043
18044       SDValue CurrentVec;
18045       if (Idx < (int)NumElts) {
18046         // This shuffle index refers to the inner shuffle N0. Lookup the inner
18047         // shuffle mask to identify which vector is actually referenced.
18048         Idx = OtherSV->getMaskElt(Idx);
18049         if (Idx < 0) {
18050           // Propagate Undef.
18051           Mask.push_back(Idx);
18052           continue;
18053         }
18054
18055         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
18056                                            : OtherSV->getOperand(1);
18057       } else {
18058         // This shuffle index references an element within N1.
18059         CurrentVec = N1;
18060       }
18061
18062       // Simple case where 'CurrentVec' is UNDEF.
18063       if (CurrentVec.isUndef()) {
18064         Mask.push_back(-1);
18065         continue;
18066       }
18067
18068       // Canonicalize the shuffle index. We don't know yet if CurrentVec
18069       // will be the first or second operand of the combined shuffle.
18070       Idx = Idx % NumElts;
18071       if (!SV0.getNode() || SV0 == CurrentVec) {
18072         // Ok. CurrentVec is the left hand side.
18073         // Update the mask accordingly.
18074         SV0 = CurrentVec;
18075         Mask.push_back(Idx);
18076         continue;
18077       }
18078
18079       // Bail out if we cannot convert the shuffle pair into a single shuffle.
18080       if (SV1.getNode() && SV1 != CurrentVec)
18081         return SDValue();
18082
18083       // Ok. CurrentVec is the right hand side.
18084       // Update the mask accordingly.
18085       SV1 = CurrentVec;
18086       Mask.push_back(Idx + NumElts);
18087     }
18088
18089     // Check if all indices in Mask are Undef. In case, propagate Undef.
18090     bool isUndefMask = true;
18091     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
18092       isUndefMask &= Mask[i] < 0;
18093
18094     if (isUndefMask)
18095       return DAG.getUNDEF(VT);
18096
18097     if (!SV0.getNode())
18098       SV0 = DAG.getUNDEF(VT);
18099     if (!SV1.getNode())
18100       SV1 = DAG.getUNDEF(VT);
18101
18102     // Avoid introducing shuffles with illegal mask.
18103     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
18104       ShuffleVectorSDNode::commuteMask(Mask);
18105
18106       if (!TLI.isShuffleMaskLegal(Mask, VT))
18107         return SDValue();
18108
18109       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
18110       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
18111       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
18112       std::swap(SV0, SV1);
18113     }
18114
18115     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18116     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18117     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18118     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
18119   }
18120
18121   return SDValue();
18122 }
18123
18124 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
18125   SDValue InVal = N->getOperand(0);
18126   EVT VT = N->getValueType(0);
18127
18128   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
18129   // with a VECTOR_SHUFFLE and possible truncate.
18130   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
18131     SDValue InVec = InVal->getOperand(0);
18132     SDValue EltNo = InVal->getOperand(1);
18133     auto InVecT = InVec.getValueType();
18134     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
18135       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
18136       int Elt = C0->getZExtValue();
18137       NewMask[0] = Elt;
18138       SDValue Val;
18139       // If we have an implict truncate do truncate here as long as it's legal.
18140       // if it's not legal, this should
18141       if (VT.getScalarType() != InVal.getValueType() &&
18142           InVal.getValueType().isScalarInteger() &&
18143           isTypeLegal(VT.getScalarType())) {
18144         Val =
18145             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
18146         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
18147       }
18148       if (VT.getScalarType() == InVecT.getScalarType() &&
18149           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
18150           TLI.isShuffleMaskLegal(NewMask, VT)) {
18151         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
18152                                    DAG.getUNDEF(InVecT), NewMask);
18153         // If the initial vector is the correct size this shuffle is a
18154         // valid result.
18155         if (VT == InVecT)
18156           return Val;
18157         // If not we must truncate the vector.
18158         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
18159           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
18160           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
18161           EVT SubVT =
18162               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
18163                                VT.getVectorNumElements());
18164           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
18165                             ZeroIdx);
18166           return Val;
18167         }
18168       }
18169     }
18170   }
18171
18172   return SDValue();
18173 }
18174
18175 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
18176   EVT VT = N->getValueType(0);
18177   SDValue N0 = N->getOperand(0);
18178   SDValue N1 = N->getOperand(1);
18179   SDValue N2 = N->getOperand(2);
18180
18181   // If inserting an UNDEF, just return the original vector.
18182   if (N1.isUndef())
18183     return N0;
18184
18185   // If this is an insert of an extracted vector into an undef vector, we can
18186   // just use the input to the extract.
18187   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18188       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
18189     return N1.getOperand(0);
18190
18191   // If we are inserting a bitcast value into an undef, with the same
18192   // number of elements, just use the bitcast input of the extract.
18193   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
18194   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
18195   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
18196       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18197       N1.getOperand(0).getOperand(1) == N2 &&
18198       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
18199           VT.getVectorNumElements() &&
18200       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
18201           VT.getSizeInBits()) {
18202     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
18203   }
18204
18205   // If both N1 and N2 are bitcast values on which insert_subvector
18206   // would makes sense, pull the bitcast through.
18207   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
18208   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
18209   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
18210     SDValue CN0 = N0.getOperand(0);
18211     SDValue CN1 = N1.getOperand(0);
18212     EVT CN0VT = CN0.getValueType();
18213     EVT CN1VT = CN1.getValueType();
18214     if (CN0VT.isVector() && CN1VT.isVector() &&
18215         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
18216         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
18217       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
18218                                       CN0.getValueType(), CN0, CN1, N2);
18219       return DAG.getBitcast(VT, NewINSERT);
18220     }
18221   }
18222
18223   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
18224   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
18225   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
18226   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
18227       N0.getOperand(1).getValueType() == N1.getValueType() &&
18228       N0.getOperand(2) == N2)
18229     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
18230                        N1, N2);
18231
18232   // Eliminate an intermediate insert into an undef vector:
18233   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
18234   // insert_subvector undef, X, N2
18235   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
18236       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
18237     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
18238                        N1.getOperand(1), N2);
18239
18240   if (!isa<ConstantSDNode>(N2))
18241     return SDValue();
18242
18243   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
18244
18245   // Canonicalize insert_subvector dag nodes.
18246   // Example:
18247   // (insert_subvector (insert_subvector A, Idx0), Idx1)
18248   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
18249   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
18250       N1.getValueType() == N0.getOperand(1).getValueType() &&
18251       isa<ConstantSDNode>(N0.getOperand(2))) {
18252     unsigned OtherIdx = N0.getConstantOperandVal(2);
18253     if (InsIdx < OtherIdx) {
18254       // Swap nodes.
18255       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
18256                                   N0.getOperand(0), N1, N2);
18257       AddToWorklist(NewOp.getNode());
18258       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
18259                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
18260     }
18261   }
18262
18263   // If the input vector is a concatenation, and the insert replaces
18264   // one of the pieces, we can optimize into a single concat_vectors.
18265   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
18266       N0.getOperand(0).getValueType() == N1.getValueType()) {
18267     unsigned Factor = N1.getValueType().getVectorNumElements();
18268
18269     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
18270     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
18271
18272     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18273   }
18274
18275   // Simplify source operands based on insertion.
18276   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18277     return SDValue(N, 0);
18278
18279   return SDValue();
18280 }
18281
18282 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
18283   SDValue N0 = N->getOperand(0);
18284
18285   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
18286   if (N0->getOpcode() == ISD::FP16_TO_FP)
18287     return N0->getOperand(0);
18288
18289   return SDValue();
18290 }
18291
18292 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
18293   SDValue N0 = N->getOperand(0);
18294
18295   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
18296   if (N0->getOpcode() == ISD::AND) {
18297     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
18298     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
18299       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
18300                          N0.getOperand(0));
18301     }
18302   }
18303
18304   return SDValue();
18305 }
18306
18307 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
18308 /// with the destination vector and a zero vector.
18309 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
18310 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
18311 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
18312   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
18313
18314   EVT VT = N->getValueType(0);
18315   SDValue LHS = N->getOperand(0);
18316   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
18317   SDLoc DL(N);
18318
18319   // Make sure we're not running after operation legalization where it
18320   // may have custom lowered the vector shuffles.
18321   if (LegalOperations)
18322     return SDValue();
18323
18324   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
18325     return SDValue();
18326
18327   EVT RVT = RHS.getValueType();
18328   unsigned NumElts = RHS.getNumOperands();
18329
18330   // Attempt to create a valid clear mask, splitting the mask into
18331   // sub elements and checking to see if each is
18332   // all zeros or all ones - suitable for shuffle masking.
18333   auto BuildClearMask = [&](int Split) {
18334     int NumSubElts = NumElts * Split;
18335     int NumSubBits = RVT.getScalarSizeInBits() / Split;
18336
18337     SmallVector<int, 8> Indices;
18338     for (int i = 0; i != NumSubElts; ++i) {
18339       int EltIdx = i / Split;
18340       int SubIdx = i % Split;
18341       SDValue Elt = RHS.getOperand(EltIdx);
18342       if (Elt.isUndef()) {
18343         Indices.push_back(-1);
18344         continue;
18345       }
18346
18347       APInt Bits;
18348       if (isa<ConstantSDNode>(Elt))
18349         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
18350       else if (isa<ConstantFPSDNode>(Elt))
18351         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
18352       else
18353         return SDValue();
18354
18355       // Extract the sub element from the constant bit mask.
18356       if (DAG.getDataLayout().isBigEndian()) {
18357         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
18358       } else {
18359         Bits.lshrInPlace(SubIdx * NumSubBits);
18360       }
18361
18362       if (Split > 1)
18363         Bits = Bits.trunc(NumSubBits);
18364
18365       if (Bits.isAllOnesValue())
18366         Indices.push_back(i);
18367       else if (Bits == 0)
18368         Indices.push_back(i + NumSubElts);
18369       else
18370         return SDValue();
18371     }
18372
18373     // Let's see if the target supports this vector_shuffle.
18374     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
18375     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
18376     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
18377       return SDValue();
18378
18379     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
18380     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
18381                                                    DAG.getBitcast(ClearVT, LHS),
18382                                                    Zero, Indices));
18383   };
18384
18385   // Determine maximum split level (byte level masking).
18386   int MaxSplit = 1;
18387   if (RVT.getScalarSizeInBits() % 8 == 0)
18388     MaxSplit = RVT.getScalarSizeInBits() / 8;
18389
18390   for (int Split = 1; Split <= MaxSplit; ++Split)
18391     if (RVT.getScalarSizeInBits() % Split == 0)
18392       if (SDValue S = BuildClearMask(Split))
18393         return S;
18394
18395   return SDValue();
18396 }
18397
18398 /// Visit a binary vector operation, like ADD.
18399 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
18400   assert(N->getValueType(0).isVector() &&
18401          "SimplifyVBinOp only works on vectors!");
18402
18403   SDValue LHS = N->getOperand(0);
18404   SDValue RHS = N->getOperand(1);
18405   SDValue Ops[] = {LHS, RHS};
18406   EVT VT = N->getValueType(0);
18407
18408   // See if we can constant fold the vector operation.
18409   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
18410           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
18411     return Fold;
18412
18413   // Type legalization might introduce new shuffles in the DAG.
18414   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
18415   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
18416   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
18417       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
18418       LHS.getOperand(1).isUndef() &&
18419       RHS.getOperand(1).isUndef()) {
18420     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
18421     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
18422
18423     if (SVN0->getMask().equals(SVN1->getMask())) {
18424       SDValue UndefVector = LHS.getOperand(1);
18425       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
18426                                      LHS.getOperand(0), RHS.getOperand(0),
18427                                      N->getFlags());
18428       AddUsersToWorklist(N);
18429       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
18430                                   SVN0->getMask());
18431     }
18432   }
18433
18434   // The following pattern is likely to emerge with vector reduction ops. Moving
18435   // the binary operation ahead of insertion may allow using a narrower vector
18436   // instruction that has better performance than the wide version of the op:
18437   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
18438   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
18439       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
18440       LHS.getOperand(2) == RHS.getOperand(2) &&
18441       (LHS.hasOneUse() || RHS.hasOneUse())) {
18442     SDValue X = LHS.getOperand(1);
18443     SDValue Y = RHS.getOperand(1);
18444     SDValue Z = LHS.getOperand(2);
18445     EVT NarrowVT = X.getValueType();
18446     if (NarrowVT == Y.getValueType() &&
18447         TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), NarrowVT)) {
18448       // (binop undef, undef) may not return undef, so compute that result.
18449       SDLoc DL(N);
18450       SDValue VecC = DAG.getNode(N->getOpcode(), DL, VT, DAG.getUNDEF(VT),
18451                                  DAG.getUNDEF(VT));
18452       SDValue NarrowBO = DAG.getNode(N->getOpcode(), DL, NarrowVT, X, Y);
18453       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
18454     }
18455   }
18456
18457   return SDValue();
18458 }
18459
18460 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
18461                                     SDValue N2) {
18462   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
18463
18464   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
18465                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
18466
18467   // If we got a simplified select_cc node back from SimplifySelectCC, then
18468   // break it down into a new SETCC node, and a new SELECT node, and then return
18469   // the SELECT node, since we were called with a SELECT node.
18470   if (SCC.getNode()) {
18471     // Check to see if we got a select_cc back (to turn into setcc/select).
18472     // Otherwise, just return whatever node we got back, like fabs.
18473     if (SCC.getOpcode() == ISD::SELECT_CC) {
18474       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
18475                                   N0.getValueType(),
18476                                   SCC.getOperand(0), SCC.getOperand(1),
18477                                   SCC.getOperand(4));
18478       AddToWorklist(SETCC.getNode());
18479       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
18480                            SCC.getOperand(2), SCC.getOperand(3));
18481     }
18482
18483     return SCC;
18484   }
18485   return SDValue();
18486 }
18487
18488 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
18489 /// being selected between, see if we can simplify the select.  Callers of this
18490 /// should assume that TheSelect is deleted if this returns true.  As such, they
18491 /// should return the appropriate thing (e.g. the node) back to the top-level of
18492 /// the DAG combiner loop to avoid it being looked at.
18493 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
18494                                     SDValue RHS) {
18495   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18496   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
18497   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
18498     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
18499       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
18500       SDValue Sqrt = RHS;
18501       ISD::CondCode CC;
18502       SDValue CmpLHS;
18503       const ConstantFPSDNode *Zero = nullptr;
18504
18505       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
18506         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
18507         CmpLHS = TheSelect->getOperand(0);
18508         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
18509       } else {
18510         // SELECT or VSELECT
18511         SDValue Cmp = TheSelect->getOperand(0);
18512         if (Cmp.getOpcode() == ISD::SETCC) {
18513           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
18514           CmpLHS = Cmp.getOperand(0);
18515           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
18516         }
18517       }
18518       if (Zero && Zero->isZero() &&
18519           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
18520           CC == ISD::SETULT || CC == ISD::SETLT)) {
18521         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18522         CombineTo(TheSelect, Sqrt);
18523         return true;
18524       }
18525     }
18526   }
18527   // Cannot simplify select with vector condition
18528   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
18529
18530   // If this is a select from two identical things, try to pull the operation
18531   // through the select.
18532   if (LHS.getOpcode() != RHS.getOpcode() ||
18533       !LHS.hasOneUse() || !RHS.hasOneUse())
18534     return false;
18535
18536   // If this is a load and the token chain is identical, replace the select
18537   // of two loads with a load through a select of the address to load from.
18538   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
18539   // constants have been dropped into the constant pool.
18540   if (LHS.getOpcode() == ISD::LOAD) {
18541     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
18542     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
18543
18544     // Token chains must be identical.
18545     if (LHS.getOperand(0) != RHS.getOperand(0) ||
18546         // Do not let this transformation reduce the number of volatile loads.
18547         LLD->isVolatile() || RLD->isVolatile() ||
18548         // FIXME: If either is a pre/post inc/dec load,
18549         // we'd need to split out the address adjustment.
18550         LLD->isIndexed() || RLD->isIndexed() ||
18551         // If this is an EXTLOAD, the VT's must match.
18552         LLD->getMemoryVT() != RLD->getMemoryVT() ||
18553         // If this is an EXTLOAD, the kind of extension must match.
18554         (LLD->getExtensionType() != RLD->getExtensionType() &&
18555          // The only exception is if one of the extensions is anyext.
18556          LLD->getExtensionType() != ISD::EXTLOAD &&
18557          RLD->getExtensionType() != ISD::EXTLOAD) ||
18558         // FIXME: this discards src value information.  This is
18559         // over-conservative. It would be beneficial to be able to remember
18560         // both potential memory locations.  Since we are discarding
18561         // src value info, don't do the transformation if the memory
18562         // locations are not in the default address space.
18563         LLD->getPointerInfo().getAddrSpace() != 0 ||
18564         RLD->getPointerInfo().getAddrSpace() != 0 ||
18565         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
18566                                       LLD->getBasePtr().getValueType()))
18567       return false;
18568
18569     // The loads must not depend on one another.
18570     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
18571       return false;
18572
18573     // Check that the select condition doesn't reach either load.  If so,
18574     // folding this will induce a cycle into the DAG.  If not, this is safe to
18575     // xform, so create a select of the addresses.
18576
18577     SmallPtrSet<const SDNode *, 32> Visited;
18578     SmallVector<const SDNode *, 16> Worklist;
18579
18580     // Always fail if LLD and RLD are not independent. TheSelect is a
18581     // predecessor to all Nodes in question so we need not search past it.
18582
18583     Visited.insert(TheSelect);
18584     Worklist.push_back(LLD);
18585     Worklist.push_back(RLD);
18586
18587     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
18588         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
18589       return false;
18590
18591     SDValue Addr;
18592     if (TheSelect->getOpcode() == ISD::SELECT) {
18593       // We cannot do this optimization if any pair of {RLD, LLD} is a
18594       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
18595       // Loads, we only need to check if CondNode is a successor to one of the
18596       // loads. We can further avoid this if there's no use of their chain
18597       // value.
18598       SDNode *CondNode = TheSelect->getOperand(0).getNode();
18599       Worklist.push_back(CondNode);
18600
18601       if ((LLD->hasAnyUseOfValue(1) &&
18602            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
18603           (RLD->hasAnyUseOfValue(1) &&
18604            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
18605         return false;
18606
18607       Addr = DAG.getSelect(SDLoc(TheSelect),
18608                            LLD->getBasePtr().getValueType(),
18609                            TheSelect->getOperand(0), LLD->getBasePtr(),
18610                            RLD->getBasePtr());
18611     } else {  // Otherwise SELECT_CC
18612       // We cannot do this optimization if any pair of {RLD, LLD} is a
18613       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
18614       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
18615       // one of the loads. We can further avoid this if there's no use of their
18616       // chain value.
18617
18618       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
18619       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
18620       Worklist.push_back(CondLHS);
18621       Worklist.push_back(CondRHS);
18622
18623       if ((LLD->hasAnyUseOfValue(1) &&
18624            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
18625           (RLD->hasAnyUseOfValue(1) &&
18626            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
18627         return false;
18628
18629       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
18630                          LLD->getBasePtr().getValueType(),
18631                          TheSelect->getOperand(0),
18632                          TheSelect->getOperand(1),
18633                          LLD->getBasePtr(), RLD->getBasePtr(),
18634                          TheSelect->getOperand(4));
18635     }
18636
18637     SDValue Load;
18638     // It is safe to replace the two loads if they have different alignments,
18639     // but the new load must be the minimum (most restrictive) alignment of the
18640     // inputs.
18641     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
18642     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
18643     if (!RLD->isInvariant())
18644       MMOFlags &= ~MachineMemOperand::MOInvariant;
18645     if (!RLD->isDereferenceable())
18646       MMOFlags &= ~MachineMemOperand::MODereferenceable;
18647     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
18648       // FIXME: Discards pointer and AA info.
18649       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
18650                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
18651                          MMOFlags);
18652     } else {
18653       // FIXME: Discards pointer and AA info.
18654       Load = DAG.getExtLoad(
18655           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
18656                                                   : LLD->getExtensionType(),
18657           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
18658           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
18659     }
18660
18661     // Users of the select now use the result of the load.
18662     CombineTo(TheSelect, Load);
18663
18664     // Users of the old loads now use the new load's chain.  We know the
18665     // old-load value is dead now.
18666     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
18667     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
18668     return true;
18669   }
18670
18671   return false;
18672 }
18673
18674 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
18675 /// bitwise 'and'.
18676 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
18677                                             SDValue N1, SDValue N2, SDValue N3,
18678                                             ISD::CondCode CC) {
18679   // If this is a select where the false operand is zero and the compare is a
18680   // check of the sign bit, see if we can perform the "gzip trick":
18681   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
18682   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
18683   EVT XType = N0.getValueType();
18684   EVT AType = N2.getValueType();
18685   if (!isNullConstant(N3) || !XType.bitsGE(AType))
18686     return SDValue();
18687
18688   // If the comparison is testing for a positive value, we have to invert
18689   // the sign bit mask, so only do that transform if the target has a bitwise
18690   // 'and not' instruction (the invert is free).
18691   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
18692     // (X > -1) ? A : 0
18693     // (X >  0) ? X : 0 <-- This is canonical signed max.
18694     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
18695       return SDValue();
18696   } else if (CC == ISD::SETLT) {
18697     // (X <  0) ? A : 0
18698     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
18699     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
18700       return SDValue();
18701   } else {
18702     return SDValue();
18703   }
18704
18705   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
18706   // constant.
18707   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
18708   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
18709   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
18710     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
18711     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
18712     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
18713     AddToWorklist(Shift.getNode());
18714
18715     if (XType.bitsGT(AType)) {
18716       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
18717       AddToWorklist(Shift.getNode());
18718     }
18719
18720     if (CC == ISD::SETGT)
18721       Shift = DAG.getNOT(DL, Shift, AType);
18722
18723     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
18724   }
18725
18726   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
18727   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
18728   AddToWorklist(Shift.getNode());
18729
18730   if (XType.bitsGT(AType)) {
18731     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
18732     AddToWorklist(Shift.getNode());
18733   }
18734
18735   if (CC == ISD::SETGT)
18736     Shift = DAG.getNOT(DL, Shift, AType);
18737
18738   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
18739 }
18740
18741 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
18742 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
18743 /// in it. This may be a win when the constant is not otherwise available
18744 /// because it replaces two constant pool loads with one.
18745 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
18746     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
18747     ISD::CondCode CC) {
18748   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
18749     return SDValue();
18750
18751   // If we are before legalize types, we want the other legalization to happen
18752   // first (for example, to avoid messing with soft float).
18753   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
18754   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
18755   EVT VT = N2.getValueType();
18756   if (!TV || !FV || !TLI.isTypeLegal(VT))
18757     return SDValue();
18758
18759   // If a constant can be materialized without loads, this does not make sense.
18760   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
18761       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) ||
18762       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0)))
18763     return SDValue();
18764
18765   // If both constants have multiple uses, then we won't need to do an extra
18766   // load. The values are likely around in registers for other users.
18767   if (!TV->hasOneUse() && !FV->hasOneUse())
18768     return SDValue();
18769
18770   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
18771                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
18772   Type *FPTy = Elts[0]->getType();
18773   const DataLayout &TD = DAG.getDataLayout();
18774
18775   // Create a ConstantArray of the two constants.
18776   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
18777   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
18778                                       TD.getPrefTypeAlignment(FPTy));
18779   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
18780
18781   // Get offsets to the 0 and 1 elements of the array, so we can select between
18782   // them.
18783   SDValue Zero = DAG.getIntPtrConstant(0, DL);
18784   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
18785   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
18786   SDValue Cond =
18787       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
18788   AddToWorklist(Cond.getNode());
18789   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
18790   AddToWorklist(CstOffset.getNode());
18791   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
18792   AddToWorklist(CPIdx.getNode());
18793   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
18794                      MachinePointerInfo::getConstantPool(
18795                          DAG.getMachineFunction()), Alignment);
18796 }
18797
18798 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
18799 /// where 'cond' is the comparison specified by CC.
18800 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
18801                                       SDValue N2, SDValue N3, ISD::CondCode CC,
18802                                       bool NotExtCompare) {
18803   // (x ? y : y) -> y.
18804   if (N2 == N3) return N2;
18805
18806   EVT CmpOpVT = N0.getValueType();
18807   EVT VT = N2.getValueType();
18808   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
18809   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
18810   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
18811
18812   // Determine if the condition we're dealing with is constant.
18813   SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL,
18814                               false);
18815   if (SCC.getNode()) AddToWorklist(SCC.getNode());
18816
18817   if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
18818     // fold select_cc true, x, y -> x
18819     // fold select_cc false, x, y -> y
18820     return !SCCC->isNullValue() ? N2 : N3;
18821   }
18822
18823   if (SDValue V =
18824           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
18825     return V;
18826
18827   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
18828     return V;
18829
18830   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
18831   // where y is has a single bit set.
18832   // A plaintext description would be, we can turn the SELECT_CC into an AND
18833   // when the condition can be materialized as an all-ones register.  Any
18834   // single bit-test can be materialized as an all-ones register with
18835   // shift-left and shift-right-arith.
18836   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
18837       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
18838     SDValue AndLHS = N0->getOperand(0);
18839     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18840     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
18841       // Shift the tested bit over the sign bit.
18842       const APInt &AndMask = ConstAndRHS->getAPIntValue();
18843       SDValue ShlAmt =
18844         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
18845                         getShiftAmountTy(AndLHS.getValueType()));
18846       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
18847
18848       // Now arithmetic right shift it all the way over, so the result is either
18849       // all-ones, or zero.
18850       SDValue ShrAmt =
18851         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
18852                         getShiftAmountTy(Shl.getValueType()));
18853       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
18854
18855       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
18856     }
18857   }
18858
18859   // fold select C, 16, 0 -> shl C, 4
18860   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
18861   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
18862
18863   if ((Fold || Swap) &&
18864       TLI.getBooleanContents(CmpOpVT) ==
18865           TargetLowering::ZeroOrOneBooleanContent &&
18866       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
18867
18868     if (Swap) {
18869       CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
18870       std::swap(N2C, N3C);
18871     }
18872
18873     // If the caller doesn't want us to simplify this into a zext of a compare,
18874     // don't do it.
18875     if (NotExtCompare && N2C->isOne())
18876       return SDValue();
18877
18878     SDValue Temp, SCC;
18879     // zext (setcc n0, n1)
18880     if (LegalTypes) {
18881       SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC);
18882       if (VT.bitsLT(SCC.getValueType()))
18883         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
18884       else
18885         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
18886     } else {
18887       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
18888       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
18889     }
18890
18891     AddToWorklist(SCC.getNode());
18892     AddToWorklist(Temp.getNode());
18893
18894     if (N2C->isOne())
18895       return Temp;
18896
18897     // shl setcc result by log2 n2c
18898     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
18899                        DAG.getConstant(N2C->getAPIntValue().logBase2(),
18900                                        SDLoc(Temp),
18901                                        getShiftAmountTy(Temp.getValueType())));
18902   }
18903
18904   // Check to see if this is an integer abs.
18905   // select_cc setg[te] X,  0,  X, -X ->
18906   // select_cc setgt    X, -1,  X, -X ->
18907   // select_cc setl[te] X,  0, -X,  X ->
18908   // select_cc setlt    X,  1, -X,  X ->
18909   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
18910   if (N1C) {
18911     ConstantSDNode *SubC = nullptr;
18912     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
18913          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
18914         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
18915       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
18916     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
18917               (N1C->isOne() && CC == ISD::SETLT)) &&
18918              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
18919       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
18920
18921     if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) {
18922       SDLoc DL(N0);
18923       SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
18924                                   DAG.getConstant(CmpOpVT.getSizeInBits() - 1,
18925                                                   DL,
18926                                                   getShiftAmountTy(CmpOpVT)));
18927       SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift);
18928       AddToWorklist(Shift.getNode());
18929       AddToWorklist(Add.getNode());
18930       return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift);
18931     }
18932   }
18933
18934   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
18935   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
18936   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
18937   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
18938   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
18939   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
18940   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
18941   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
18942   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
18943     SDValue ValueOnZero = N2;
18944     SDValue Count = N3;
18945     // If the condition is NE instead of E, swap the operands.
18946     if (CC == ISD::SETNE)
18947       std::swap(ValueOnZero, Count);
18948     // Check if the value on zero is a constant equal to the bits in the type.
18949     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
18950       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
18951         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
18952         // legal, combine to just cttz.
18953         if ((Count.getOpcode() == ISD::CTTZ ||
18954              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
18955             N0 == Count.getOperand(0) &&
18956             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
18957           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
18958         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
18959         // legal, combine to just ctlz.
18960         if ((Count.getOpcode() == ISD::CTLZ ||
18961              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
18962             N0 == Count.getOperand(0) &&
18963             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
18964           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
18965       }
18966     }
18967   }
18968
18969   return SDValue();
18970 }
18971
18972 /// This is a stub for TargetLowering::SimplifySetCC.
18973 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
18974                                    ISD::CondCode Cond, const SDLoc &DL,
18975                                    bool foldBooleans) {
18976   TargetLowering::DAGCombinerInfo
18977     DagCombineInfo(DAG, Level, false, this);
18978   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
18979 }
18980
18981 /// Given an ISD::SDIV node expressing a divide by constant, return
18982 /// a DAG expression to select that will generate the same value by multiplying
18983 /// by a magic number.
18984 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18985 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
18986   // when optimising for minimum size, we don't want to expand a div to a mul
18987   // and a shift.
18988   if (DAG.getMachineFunction().getFunction().optForMinSize())
18989     return SDValue();
18990
18991   SmallVector<SDNode *, 8> Built;
18992   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
18993     for (SDNode *N : Built)
18994       AddToWorklist(N);
18995     return S;
18996   }
18997
18998   return SDValue();
18999 }
19000
19001 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
19002 /// DAG expression that will generate the same value by right shifting.
19003 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
19004   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
19005   if (!C)
19006     return SDValue();
19007
19008   // Avoid division by zero.
19009   if (C->isNullValue())
19010     return SDValue();
19011
19012   SmallVector<SDNode *, 8> Built;
19013   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
19014     for (SDNode *N : Built)
19015       AddToWorklist(N);
19016     return S;
19017   }
19018
19019   return SDValue();
19020 }
19021
19022 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
19023 /// expression that will generate the same value by multiplying by a magic
19024 /// number.
19025 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
19026 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
19027   // when optimising for minimum size, we don't want to expand a div to a mul
19028   // and a shift.
19029   if (DAG.getMachineFunction().getFunction().optForMinSize())
19030     return SDValue();
19031
19032   SmallVector<SDNode *, 8> Built;
19033   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
19034     for (SDNode *N : Built)
19035       AddToWorklist(N);
19036     return S;
19037   }
19038
19039   return SDValue();
19040 }
19041
19042 /// Determines the LogBase2 value for a non-null input value using the
19043 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
19044 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
19045   EVT VT = V.getValueType();
19046   unsigned EltBits = VT.getScalarSizeInBits();
19047   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
19048   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
19049   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
19050   return LogBase2;
19051 }
19052
19053 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19054 /// For the reciprocal, we need to find the zero of the function:
19055 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
19056 ///     =>
19057 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
19058 ///     does not require additional intermediate precision]
19059 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
19060   if (Level >= AfterLegalizeDAG)
19061     return SDValue();
19062
19063   // TODO: Handle half and/or extended types?
19064   EVT VT = Op.getValueType();
19065   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
19066     return SDValue();
19067
19068   // If estimates are explicitly disabled for this function, we're done.
19069   MachineFunction &MF = DAG.getMachineFunction();
19070   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
19071   if (Enabled == TLI.ReciprocalEstimate::Disabled)
19072     return SDValue();
19073
19074   // Estimates may be explicitly enabled for this type with a custom number of
19075   // refinement steps.
19076   int Iterations = TLI.getDivRefinementSteps(VT, MF);
19077   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
19078     AddToWorklist(Est.getNode());
19079
19080     if (Iterations) {
19081       EVT VT = Op.getValueType();
19082       SDLoc DL(Op);
19083       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
19084
19085       // Newton iterations: Est = Est + Est (1 - Arg * Est)
19086       for (int i = 0; i < Iterations; ++i) {
19087         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
19088         AddToWorklist(NewEst.getNode());
19089
19090         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
19091         AddToWorklist(NewEst.getNode());
19092
19093         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
19094         AddToWorklist(NewEst.getNode());
19095
19096         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
19097         AddToWorklist(Est.getNode());
19098       }
19099     }
19100     return Est;
19101   }
19102
19103   return SDValue();
19104 }
19105
19106 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19107 /// For the reciprocal sqrt, we need to find the zero of the function:
19108 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
19109 ///     =>
19110 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
19111 /// As a result, we precompute A/2 prior to the iteration loop.
19112 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
19113                                          unsigned Iterations,
19114                                          SDNodeFlags Flags, bool Reciprocal) {
19115   EVT VT = Arg.getValueType();
19116   SDLoc DL(Arg);
19117   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
19118
19119   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
19120   // this entire sequence requires only one FP constant.
19121   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
19122   AddToWorklist(HalfArg.getNode());
19123
19124   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
19125   AddToWorklist(HalfArg.getNode());
19126
19127   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
19128   for (unsigned i = 0; i < Iterations; ++i) {
19129     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
19130     AddToWorklist(NewEst.getNode());
19131
19132     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
19133     AddToWorklist(NewEst.getNode());
19134
19135     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
19136     AddToWorklist(NewEst.getNode());
19137
19138     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
19139     AddToWorklist(Est.getNode());
19140   }
19141
19142   // If non-reciprocal square root is requested, multiply the result by Arg.
19143   if (!Reciprocal) {
19144     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
19145     AddToWorklist(Est.getNode());
19146   }
19147
19148   return Est;
19149 }
19150
19151 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19152 /// For the reciprocal sqrt, we need to find the zero of the function:
19153 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
19154 ///     =>
19155 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
19156 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
19157                                          unsigned Iterations,
19158                                          SDNodeFlags Flags, bool Reciprocal) {
19159   EVT VT = Arg.getValueType();
19160   SDLoc DL(Arg);
19161   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
19162   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
19163
19164   // This routine must enter the loop below to work correctly
19165   // when (Reciprocal == false).
19166   assert(Iterations > 0);
19167
19168   // Newton iterations for reciprocal square root:
19169   // E = (E * -0.5) * ((A * E) * E + -3.0)
19170   for (unsigned i = 0; i < Iterations; ++i) {
19171     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
19172     AddToWorklist(AE.getNode());
19173
19174     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
19175     AddToWorklist(AEE.getNode());
19176
19177     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
19178     AddToWorklist(RHS.getNode());
19179
19180     // When calculating a square root at the last iteration build:
19181     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
19182     // (notice a common subexpression)
19183     SDValue LHS;
19184     if (Reciprocal || (i + 1) < Iterations) {
19185       // RSQRT: LHS = (E * -0.5)
19186       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
19187     } else {
19188       // SQRT: LHS = (A * E) * -0.5
19189       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
19190     }
19191     AddToWorklist(LHS.getNode());
19192
19193     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
19194     AddToWorklist(Est.getNode());
19195   }
19196
19197   return Est;
19198 }
19199
19200 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
19201 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
19202 /// Op can be zero.
19203 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
19204                                            bool Reciprocal) {
19205   if (Level >= AfterLegalizeDAG)
19206     return SDValue();
19207
19208   // TODO: Handle half and/or extended types?
19209   EVT VT = Op.getValueType();
19210   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
19211     return SDValue();
19212
19213   // If estimates are explicitly disabled for this function, we're done.
19214   MachineFunction &MF = DAG.getMachineFunction();
19215   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
19216   if (Enabled == TLI.ReciprocalEstimate::Disabled)
19217     return SDValue();
19218
19219   // Estimates may be explicitly enabled for this type with a custom number of
19220   // refinement steps.
19221   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
19222
19223   bool UseOneConstNR = false;
19224   if (SDValue Est =
19225       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
19226                           Reciprocal)) {
19227     AddToWorklist(Est.getNode());
19228
19229     if (Iterations) {
19230       Est = UseOneConstNR
19231             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
19232             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
19233
19234       if (!Reciprocal) {
19235         // The estimate is now completely wrong if the input was exactly 0.0 or
19236         // possibly a denormal. Force the answer to 0.0 for those cases.
19237         EVT VT = Op.getValueType();
19238         SDLoc DL(Op);
19239         EVT CCVT = getSetCCResultType(VT);
19240         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
19241         const Function &F = DAG.getMachineFunction().getFunction();
19242         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
19243         if (Denorms.getValueAsString().equals("ieee")) {
19244           // fabs(X) < SmallestNormal ? 0.0 : Est
19245           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
19246           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
19247           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
19248           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
19249           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
19250           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
19251           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
19252           AddToWorklist(Fabs.getNode());
19253           AddToWorklist(IsDenorm.getNode());
19254           AddToWorklist(Est.getNode());
19255         } else {
19256           // X == 0.0 ? 0.0 : Est
19257           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
19258           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
19259           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
19260           AddToWorklist(IsZero.getNode());
19261           AddToWorklist(Est.getNode());
19262         }
19263       }
19264     }
19265     return Est;
19266   }
19267
19268   return SDValue();
19269 }
19270
19271 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19272   return buildSqrtEstimateImpl(Op, Flags, true);
19273 }
19274
19275 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19276   return buildSqrtEstimateImpl(Op, Flags, false);
19277 }
19278
19279 /// Return true if there is any possibility that the two addresses overlap.
19280 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
19281   // If they are the same then they must be aliases.
19282   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
19283
19284   // If they are both volatile then they cannot be reordered.
19285   if (Op0->isVolatile() && Op1->isVolatile()) return true;
19286
19287   // If one operation reads from invariant memory, and the other may store, they
19288   // cannot alias. These should really be checking the equivalent of mayWrite,
19289   // but it only matters for memory nodes other than load /store.
19290   if (Op0->isInvariant() && Op1->writeMem())
19291     return false;
19292
19293   if (Op1->isInvariant() && Op0->writeMem())
19294     return false;
19295
19296   unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
19297   unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
19298
19299   // Check for BaseIndexOffset matching.
19300   bool IsAlias;
19301   if (BaseIndexOffset::computeAliasing(
19302           BaseIndexOffset::match(Op0, DAG), NumBytes0,
19303           BaseIndexOffset::match(Op1, DAG), NumBytes1, DAG, IsAlias))
19304     return IsAlias;
19305
19306   // If we know required SrcValue1 and SrcValue2 have relatively large
19307   // alignment compared to the size and offset of the access, we may be able
19308   // to prove they do not alias. This check is conservative for now to catch
19309   // cases created by splitting vector types.
19310   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
19311   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
19312   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
19313   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
19314   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
19315       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
19316     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
19317     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
19318
19319     // There is no overlap between these relatively aligned accesses of
19320     // similar size. Return no alias.
19321     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
19322         (OffAlign1 + NumBytes1) <= OffAlign0)
19323       return false;
19324   }
19325
19326   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
19327                    ? CombinerGlobalAA
19328                    : DAG.getSubtarget().useAA();
19329 #ifndef NDEBUG
19330   if (CombinerAAOnlyFunc.getNumOccurrences() &&
19331       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
19332     UseAA = false;
19333 #endif
19334
19335   if (UseAA && AA &&
19336       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
19337     // Use alias analysis information.
19338     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
19339     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
19340     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
19341     AliasResult AAResult =
19342         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
19343                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
19344                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
19345                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
19346     if (AAResult == NoAlias)
19347       return false;
19348   }
19349
19350   // Otherwise we have to assume they alias.
19351   return true;
19352 }
19353
19354 /// Walk up chain skipping non-aliasing memory nodes,
19355 /// looking for aliasing nodes and adding them to the Aliases vector.
19356 void DAGCombiner::GatherAllAliases(LSBaseSDNode *N, SDValue OriginalChain,
19357                                    SmallVectorImpl<SDValue> &Aliases) {
19358   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
19359   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
19360
19361   // Get alias information for node.
19362   bool IsLoad = isa<LoadSDNode>(N) && !N->isVolatile();
19363   const BaseIndexOffset LSBasePtr = BaseIndexOffset::match(N, DAG);
19364   const unsigned LSNumBytes = N->getMemoryVT().getStoreSize();
19365
19366   // Starting off.
19367   Chains.push_back(OriginalChain);
19368   unsigned Depth = 0;
19369
19370   // Look at each chain and determine if it is an alias.  If so, add it to the
19371   // aliases list.  If not, then continue up the chain looking for the next
19372   // candidate.
19373   while (!Chains.empty()) {
19374     SDValue Chain = Chains.pop_back_val();
19375
19376     // For TokenFactor nodes, look at each operand and only continue up the
19377     // chain until we reach the depth limit.
19378     //
19379     // FIXME: The depth check could be made to return the last non-aliasing
19380     // chain we found before we hit a tokenfactor rather than the original
19381     // chain.
19382     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
19383       Aliases.clear();
19384       Aliases.push_back(OriginalChain);
19385       return;
19386     }
19387
19388     // Don't bother if we've been before.
19389     if (!Visited.insert(Chain.getNode()).second)
19390       continue;
19391
19392     switch (Chain.getOpcode()) {
19393     case ISD::EntryToken:
19394       // Entry token is ideal chain operand, but handled in FindBetterChain.
19395       break;
19396
19397     case ISD::LOAD:
19398     case ISD::STORE: {
19399       // Get alias information for Chain.
19400       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
19401           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
19402
19403       // If chain is alias then stop here.
19404       if (!(IsLoad && IsOpLoad) &&
19405           isAlias(N, cast<LSBaseSDNode>(Chain.getNode()))) {
19406         Aliases.push_back(Chain);
19407       } else {
19408         // Look further up the chain.
19409         Chains.push_back(Chain.getOperand(0));
19410         ++Depth;
19411       }
19412       break;
19413     }
19414
19415     case ISD::TokenFactor:
19416       // We have to check each of the operands of the token factor for "small"
19417       // token factors, so we queue them up.  Adding the operands to the queue
19418       // (stack) in reverse order maintains the original order and increases the
19419       // likelihood that getNode will find a matching token factor (CSE.)
19420       if (Chain.getNumOperands() > 16) {
19421         Aliases.push_back(Chain);
19422         break;
19423       }
19424       for (unsigned n = Chain.getNumOperands(); n;)
19425         Chains.push_back(Chain.getOperand(--n));
19426       ++Depth;
19427       break;
19428
19429     case ISD::CopyFromReg:
19430       // Forward past CopyFromReg.
19431       Chains.push_back(Chain.getOperand(0));
19432       ++Depth;
19433       break;
19434
19435     case ISD::LIFETIME_START:
19436     case ISD::LIFETIME_END: {
19437       // We can forward past any lifetime start/end that can be proven not to
19438       // alias the memory access.
19439       const auto *Lifetime = cast<LifetimeSDNode>(Chain);
19440       if (!Lifetime->hasOffset())
19441         break; // Be conservative if we don't know the extents of the object.
19442
19443       const BaseIndexOffset LifetimePtr(Lifetime->getOperand(1), SDValue(),
19444                                         Lifetime->getOffset(), false);
19445       bool IsAlias;
19446       if (BaseIndexOffset::computeAliasing(LifetimePtr, Lifetime->getSize(),
19447                                            LSBasePtr, LSNumBytes, DAG,
19448                                            IsAlias) &&
19449           !IsAlias) {
19450         Chains.push_back(Chain.getOperand(0));
19451         ++Depth;
19452       }
19453       break;
19454     }
19455
19456     default:
19457       // For all other instructions we will just have to take what we can get.
19458       Aliases.push_back(Chain);
19459       break;
19460     }
19461   }
19462 }
19463
19464 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
19465 /// (aliasing node.)
19466 SDValue DAGCombiner::FindBetterChain(LSBaseSDNode *N, SDValue OldChain) {
19467   if (OptLevel == CodeGenOpt::None)
19468     return OldChain;
19469
19470   // Ops for replacing token factor.
19471   SmallVector<SDValue, 8> Aliases;
19472
19473   // Accumulate all the aliases to this node.
19474   GatherAllAliases(N, OldChain, Aliases);
19475
19476   // If no operands then chain to entry token.
19477   if (Aliases.size() == 0)
19478     return DAG.getEntryNode();
19479
19480   // If a single operand then chain to it.  We don't need to revisit it.
19481   if (Aliases.size() == 1)
19482     return Aliases[0];
19483
19484   // Construct a custom tailored token factor.
19485   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
19486 }
19487
19488 namespace {
19489 // TODO: Replace with with std::monostate when we move to C++17.
19490 struct UnitT { } Unit;
19491 bool operator==(const UnitT &, const UnitT &) { return true; }
19492 bool operator!=(const UnitT &, const UnitT &) { return false; }
19493 } // namespace
19494
19495 // This function tries to collect a bunch of potentially interesting
19496 // nodes to improve the chains of, all at once. This might seem
19497 // redundant, as this function gets called when visiting every store
19498 // node, so why not let the work be done on each store as it's visited?
19499 //
19500 // I believe this is mainly important because MergeConsecutiveStores
19501 // is unable to deal with merging stores of different sizes, so unless
19502 // we improve the chains of all the potential candidates up-front
19503 // before running MergeConsecutiveStores, it might only see some of
19504 // the nodes that will eventually be candidates, and then not be able
19505 // to go from a partially-merged state to the desired final
19506 // fully-merged state.
19507
19508 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
19509   SmallVector<StoreSDNode *, 8> ChainedStores;
19510   StoreSDNode *STChain = St;
19511   // Intervals records which offsets from BaseIndex have been covered. In
19512   // the common case, every store writes to the immediately previous address
19513   // space and thus merged with the previous interval at insertion time.
19514
19515   using IMap =
19516       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
19517   IMap::Allocator A;
19518   IMap Intervals(A);
19519
19520   // This holds the base pointer, index, and the offset in bytes from the base
19521   // pointer.
19522   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
19523
19524   // We must have a base and an offset.
19525   if (!BasePtr.getBase().getNode())
19526     return false;
19527
19528   // Do not handle stores to undef base pointers.
19529   if (BasePtr.getBase().isUndef())
19530     return false;
19531
19532   // Add ST's interval.
19533   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
19534
19535   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
19536     // If the chain has more than one use, then we can't reorder the mem ops.
19537     if (!SDValue(Chain, 0)->hasOneUse())
19538       break;
19539     if (Chain->isVolatile() || Chain->isIndexed())
19540       break;
19541
19542     // Find the base pointer and offset for this memory node.
19543     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
19544     // Check that the base pointer is the same as the original one.
19545     int64_t Offset;
19546     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
19547       break;
19548     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
19549     // Make sure we don't overlap with other intervals by checking the ones to
19550     // the left or right before inserting.
19551     auto I = Intervals.find(Offset);
19552     // If there's a next interval, we should end before it.
19553     if (I != Intervals.end() && I.start() < (Offset + Length))
19554       break;
19555     // If there's a previous interval, we should start after it.
19556     if (I != Intervals.begin() && (--I).stop() <= Offset)
19557       break;
19558     Intervals.insert(Offset, Offset + Length, Unit);
19559
19560     ChainedStores.push_back(Chain);
19561     STChain = Chain;
19562   }
19563
19564   // If we didn't find a chained store, exit.
19565   if (ChainedStores.size() == 0)
19566     return false;
19567
19568   // Improve all chained stores (St and ChainedStores members) starting from
19569   // where the store chain ended and return single TokenFactor.
19570   SDValue NewChain = STChain->getChain();
19571   SmallVector<SDValue, 8> TFOps;
19572   for (unsigned I = ChainedStores.size(); I;) {
19573     StoreSDNode *S = ChainedStores[--I];
19574     SDValue BetterChain = FindBetterChain(S, NewChain);
19575     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
19576         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
19577     TFOps.push_back(SDValue(S, 0));
19578     ChainedStores[I] = S;
19579   }
19580
19581   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
19582   SDValue BetterChain = FindBetterChain(St, NewChain);
19583   SDValue NewST;
19584   if (St->isTruncatingStore())
19585     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
19586                               St->getBasePtr(), St->getMemoryVT(),
19587                               St->getMemOperand());
19588   else
19589     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
19590                          St->getBasePtr(), St->getMemOperand());
19591
19592   TFOps.push_back(NewST);
19593
19594   // If we improved every element of TFOps, then we've lost the dependence on
19595   // NewChain to successors of St and we need to add it back to TFOps. Do so at
19596   // the beginning to keep relative order consistent with FindBetterChains.
19597   auto hasImprovedChain = [&](SDValue ST) -> bool {
19598     return ST->getOperand(0) != NewChain;
19599   };
19600   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
19601   if (AddNewChain)
19602     TFOps.insert(TFOps.begin(), NewChain);
19603
19604   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
19605   CombineTo(St, TF);
19606
19607   AddToWorklist(STChain);
19608   // Add TF operands worklist in reverse order.
19609   for (auto I = TF->getNumOperands(); I;)
19610     AddToWorklist(TF->getOperand(--I).getNode());
19611   AddToWorklist(TF.getNode());
19612   return true;
19613 }
19614
19615 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
19616   if (OptLevel == CodeGenOpt::None)
19617     return false;
19618
19619   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
19620
19621   // We must have a base and an offset.
19622   if (!BasePtr.getBase().getNode())
19623     return false;
19624
19625   // Do not handle stores to undef base pointers.
19626   if (BasePtr.getBase().isUndef())
19627     return false;
19628
19629   // Directly improve a chain of disjoint stores starting at St.
19630   if (parallelizeChainedStores(St))
19631     return true;
19632
19633   // Improve St's Chain..
19634   SDValue BetterChain = FindBetterChain(St, St->getChain());
19635   if (St->getChain() != BetterChain) {
19636     replaceStoreChain(St, BetterChain);
19637     return true;
19638   }
19639   return false;
19640 }
19641
19642 /// This is the entry point for the file.
19643 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
19644                            CodeGenOpt::Level OptLevel) {
19645   /// This is the main entry point to this class.
19646   DAGCombiner(*this, AA, OptLevel).Run(Level);
19647 }