llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  10 // both before and after the DAG is legalized.
  11 //
  12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  13 // primarily intended to handle simplification opportunities that are implicit
  14 // in the LLVM IR and exposed by the various codegen lowering phases.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #include "llvm/ADT/APFloat.h"
  19 #include "llvm/ADT/APInt.h"
  20 #include "llvm/ADT/ArrayRef.h"
  21 #include "llvm/ADT/DenseMap.h"
  22 #include "llvm/ADT/IntervalMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallBitVector.h"
  28 #include "llvm/ADT/SmallPtrSet.h"
  29 #include "llvm/ADT/SmallSet.h"
  30 #include "llvm/ADT/SmallVector.h"
  31 #include "llvm/ADT/Statistic.h"
  32 #include "llvm/Analysis/AliasAnalysis.h"
  33 #include "llvm/Analysis/MemoryLocation.h"
  34 #include "llvm/Analysis/TargetLibraryInfo.h"
  35 #include "llvm/Analysis/VectorUtils.h"
  36 #include "llvm/CodeGen/DAGCombine.h"
  37 #include "llvm/CodeGen/ISDOpcodes.h"
  38 #include "llvm/CodeGen/MachineFrameInfo.h"
  39 #include "llvm/CodeGen/MachineFunction.h"
  40 #include "llvm/CodeGen/MachineMemOperand.h"
  41 #include "llvm/CodeGen/RuntimeLibcalls.h"
  42 #include "llvm/CodeGen/SelectionDAG.h"
  43 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  44 #include "llvm/CodeGen/SelectionDAGNodes.h"
  45 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  46 #include "llvm/CodeGen/TargetLowering.h"
  47 #include "llvm/CodeGen/TargetRegisterInfo.h"
  48 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  49 #include "llvm/CodeGen/ValueTypes.h"
  50 #include "llvm/IR/Attributes.h"
  51 #include "llvm/IR/Constant.h"
  52 #include "llvm/IR/DataLayout.h"
  53 #include "llvm/IR/DerivedTypes.h"
  54 #include "llvm/IR/Function.h"
  55 #include "llvm/IR/LLVMContext.h"
  56 #include "llvm/IR/Metadata.h"
  57 #include "llvm/Support/Casting.h"
  58 #include "llvm/Support/CodeGen.h"
  59 #include "llvm/Support/CommandLine.h"
  60 #include "llvm/Support/Compiler.h"
  61 #include "llvm/Support/Debug.h"
  62 #include "llvm/Support/ErrorHandling.h"
  63 #include "llvm/Support/KnownBits.h"
  64 #include "llvm/Support/MachineValueType.h"
  65 #include "llvm/Support/MathExtras.h"
  66 #include "llvm/Support/raw_ostream.h"
  67 #include "llvm/Target/TargetMachine.h"
  68 #include "llvm/Target/TargetOptions.h"
  69 #include <algorithm>
  70 #include <cassert>
  71 #include <cstdint>
  72 #include <functional>
  73 #include <iterator>
  74 #include <string>
  75 #include <tuple>
  76 #include <utility>
  77
  78 using namespace llvm;
  79
  80 #define DEBUG_TYPE "dagcombine"
  81
  82 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  85 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  86 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  87 STATISTIC(SlicedLoads, "Number of load sliced");
  88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  89
  90 static cl::opt<bool>
  91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  92                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  93
  94 static cl::opt<bool>
  95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  96         cl::desc("Enable DAG combiner's use of TBAA"));
  97
  98 #ifndef NDEBUG
  99 static cl::opt<std::string>
 100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
 101                    cl::desc("Only use DAG-combiner alias analysis in this"
 102                             " function"));
 103 #endif
 104
 105 /// Hidden option to stress test load slicing, i.e., when this option
 106 /// is enabled, load slicing bypasses most of its profitability guards.
 107 static cl::opt<bool>
 108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 109                   cl::desc("Bypass the profitability model of load slicing"),
 110                   cl::init(false));
 111
 112 static cl::opt<bool>
 113   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 114                     cl::desc("DAG combiner may split indexing from loads"));
 115
 116 static cl::opt<bool>
 117     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
 118                        cl::desc("DAG combiner enable merging multiple stores "
 119                                 "into a wider store"));
 120
 121 static cl::opt<unsigned> TokenFactorInlineLimit(
 122     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
 123     cl::desc("Limit the number of operands to inline for Token Factors"));
 124
 125 static cl::opt<unsigned> StoreMergeDependenceLimit(
 126     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
 127     cl::desc("Limit the number of times for the same StoreNode and RootNode "
 128              "to bail out in store merging dependence check"));
 129
 130 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
 131     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
 132     cl::desc("DAG combiner enable reducing the width of load/op/store "
 133              "sequence"));
 134
 135 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
 136     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
 137     cl::desc("DAG combiner enable load/<replace bytes>/store with "
 138              "a narrower store"));
 139
 140 namespace {
 141
 142   class DAGCombiner {
 143     SelectionDAG &DAG;
 144     const TargetLowering &TLI;
 145     const SelectionDAGTargetInfo *STI;
 146     CombineLevel Level;
 147     CodeGenOpt::Level OptLevel;
 148     bool LegalDAG = false;
 149     bool LegalOperations = false;
 150     bool LegalTypes = false;
 151     bool ForCodeSize;
 152     bool DisableGenericCombines;
 153
 154     /// Worklist of all of the nodes that need to be simplified.
 155     ///
 156     /// This must behave as a stack -- new nodes to process are pushed onto the
 157     /// back and when processing we pop off of the back.
 158     ///
 159     /// The worklist will not contain duplicates but may contain null entries
 160     /// due to nodes being deleted from the underlying DAG.
 161     SmallVector<SDNode *, 64> Worklist;
 162
 163     /// Mapping from an SDNode to its position on the worklist.
 164     ///
 165     /// This is used to find and remove nodes from the worklist (by nulling
 166     /// them) when they are deleted from the underlying DAG. It relies on
 167     /// stable indices of nodes within the worklist.
 168     DenseMap<SDNode *, unsigned> WorklistMap;
 169     /// This records all nodes attempted to add to the worklist since we
 170     /// considered a new worklist entry. As we keep do not add duplicate nodes
 171     /// in the worklist, this is different from the tail of the worklist.
 172     SmallSetVector<SDNode *, 32> PruningList;
 173
 174     /// Set of nodes which have been combined (at least once).
 175     ///
 176     /// This is used to allow us to reliably add any operands of a DAG node
 177     /// which have not yet been combined to the worklist.
 178     SmallPtrSet<SDNode *, 32> CombinedNodes;
 179
 180     /// Map from candidate StoreNode to the pair of RootNode and count.
 181     /// The count is used to track how many times we have seen the StoreNode
 182     /// with the same RootNode bail out in dependence check. If we have seen
 183     /// the bail out for the same pair many times over a limit, we won't
 184     /// consider the StoreNode with the same RootNode as store merging
 185     /// candidate again.
 186     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
 187
 188     // AA - Used for DAG load/store alias analysis.
 189     AliasAnalysis *AA;
 190
 191     /// When an instruction is simplified, add all users of the instruction to
 192     /// the work lists because they might get more simplified now.
 193     void AddUsersToWorklist(SDNode *N) {
 194       for (SDNode *Node : N->uses())
 195         AddToWorklist(Node);
 196     }
 197
 198     /// Convenient shorthand to add a node and all of its user to the worklist.
 199     void AddToWorklistWithUsers(SDNode *N) {
 200       AddUsersToWorklist(N);
 201       AddToWorklist(N);
 202     }
 203
 204     // Prune potentially dangling nodes. This is called after
 205     // any visit to a node, but should also be called during a visit after any
 206     // failed combine which may have created a DAG node.
 207     void clearAddedDanglingWorklistEntries() {
 208       // Check any nodes added to the worklist to see if they are prunable.
 209       while (!PruningList.empty()) {
 210         auto *N = PruningList.pop_back_val();
 211         if (N->use_empty())
 212           recursivelyDeleteUnusedNodes(N);
 213       }
 214     }
 215
 216     SDNode *getNextWorklistEntry() {
 217       // Before we do any work, remove nodes that are not in use.
 218       clearAddedDanglingWorklistEntries();
 219       SDNode *N = nullptr;
 220       // The Worklist holds the SDNodes in order, but it may contain null
 221       // entries.
 222       while (!N && !Worklist.empty()) {
 223         N = Worklist.pop_back_val();
 224       }
 225
 226       if (N) {
 227         bool GoodWorklistEntry = WorklistMap.erase(N);
 228         (void)GoodWorklistEntry;
 229         assert(GoodWorklistEntry &&
 230                "Found a worklist entry without a corresponding map entry!");
 231       }
 232       return N;
 233     }
 234
 235     /// Call the node-specific routine that folds each particular type of node.
 236     SDValue visit(SDNode *N);
 237
 238   public:
 239     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 240         : DAG(D), TLI(D.getTargetLoweringInfo()),
 241           STI(D.getSubtarget().getSelectionDAGInfo()),
 242           Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
 243       ForCodeSize = DAG.shouldOptForSize();
 244       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
 245
 246       MaximumLegalStoreInBits = 0;
 247       // We use the minimum store size here, since that's all we can guarantee
 248       // for the scalable vector types.
 249       for (MVT VT : MVT::all_valuetypes())
 250         if (EVT(VT).isSimple() && VT != MVT::Other &&
 251             TLI.isTypeLegal(EVT(VT)) &&
 252             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
 253           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
 254     }
 255
 256     void ConsiderForPruning(SDNode *N) {
 257       // Mark this for potential pruning.
 258       PruningList.insert(N);
 259     }
 260
 261     /// Add to the worklist making sure its instance is at the back (next to be
 262     /// processed.)
 263     void AddToWorklist(SDNode *N) {
 264       assert(N->getOpcode() != ISD::DELETED_NODE &&
 265              "Deleted Node added to Worklist");
 266
 267       // Skip handle nodes as they can't usefully be combined and confuse the
 268       // zero-use deletion strategy.
 269       if (N->getOpcode() == ISD::HANDLENODE)
 270         return;
 271
 272       ConsiderForPruning(N);
 273
 274       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 275         Worklist.push_back(N);
 276     }
 277
 278     /// Remove all instances of N from the worklist.
 279     void removeFromWorklist(SDNode *N) {
 280       CombinedNodes.erase(N);
 281       PruningList.remove(N);
 282       StoreRootCountMap.erase(N);
 283
 284       auto It = WorklistMap.find(N);
 285       if (It == WorklistMap.end())
 286         return; // Not in the worklist.
 287
 288       // Null out the entry rather than erasing it to avoid a linear operation.
 289       Worklist[It->second] = nullptr;
 290       WorklistMap.erase(It);
 291     }
 292
 293     void deleteAndRecombine(SDNode *N);
 294     bool recursivelyDeleteUnusedNodes(SDNode *N);
 295
 296     /// Replaces all uses of the results of one DAG node with new values.
 297     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 298                       bool AddTo = true);
 299
 300     /// Replaces all uses of the results of one DAG node with new values.
 301     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 302       return CombineTo(N, &Res, 1, AddTo);
 303     }
 304
 305     /// Replaces all uses of the results of one DAG node with new values.
 306     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 307                       bool AddTo = true) {
 308       SDValue To[] = { Res0, Res1 };
 309       return CombineTo(N, To, 2, AddTo);
 310     }
 311
 312     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 313
 314   private:
 315     unsigned MaximumLegalStoreInBits;
 316
 317     /// Check the specified integer node value to see if it can be simplified or
 318     /// if things it uses can be simplified by bit propagation.
 319     /// If so, return true.
 320     bool SimplifyDemandedBits(SDValue Op) {
 321       unsigned BitWidth = Op.getScalarValueSizeInBits();
 322       APInt DemandedBits = APInt::getAllOnes(BitWidth);
 323       return SimplifyDemandedBits(Op, DemandedBits);
 324     }
 325
 326     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
 327       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
 328       KnownBits Known;
 329       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
 330         return false;
 331
 332       // Revisit the node.
 333       AddToWorklist(Op.getNode());
 334
 335       CommitTargetLoweringOpt(TLO);
 336       return true;
 337     }
 338
 339     /// Check the specified vector node value to see if it can be simplified or
 340     /// if things it uses can be simplified as it only uses some of the
 341     /// elements. If so, return true.
 342     bool SimplifyDemandedVectorElts(SDValue Op) {
 343       // TODO: For now just pretend it cannot be simplified.
 344       if (Op.getValueType().isScalableVector())
 345         return false;
 346
 347       unsigned NumElts = Op.getValueType().getVectorNumElements();
 348       APInt DemandedElts = APInt::getAllOnes(NumElts);
 349       return SimplifyDemandedVectorElts(Op, DemandedElts);
 350     }
 351
 352     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 353                               const APInt &DemandedElts,
 354                               bool AssumeSingleUse = false);
 355     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
 356                                     bool AssumeSingleUse = false);
 357
 358     bool CombineToPreIndexedLoadStore(SDNode *N);
 359     bool CombineToPostIndexedLoadStore(SDNode *N);
 360     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 361     bool SliceUpLoad(SDNode *N);
 362
 363     // Scalars have size 0 to distinguish from singleton vectors.
 364     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
 365     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
 366     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
 367
 368     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 369     ///   load.
 370     ///
 371     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 372     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 373     /// \param EltNo index of the vector element to load.
 374     /// \param OriginalLoad load that EVE came from to be replaced.
 375     /// \returns EVE on success SDValue() on failure.
 376     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 377                                          SDValue EltNo,
 378                                          LoadSDNode *OriginalLoad);
 379     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 380     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 381     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 382     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 383     SDValue PromoteIntBinOp(SDValue Op);
 384     SDValue PromoteIntShiftOp(SDValue Op);
 385     SDValue PromoteExtend(SDValue Op);
 386     bool PromoteLoad(SDValue Op);
 387
 388     /// Call the node-specific routine that knows how to fold each
 389     /// particular type of node. If that doesn't do anything, try the
 390     /// target-specific DAG combines.
 391     SDValue combine(SDNode *N);
 392
 393     // Visitation implementation - Implement dag node combining for different
 394     // node types.  The semantics are as follows:
 395     // Return Value:
 396     //   SDValue.getNode() == 0 - No change was made
 397     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 398     //   otherwise              - N should be replaced by the returned Operand.
 399     //
 400     SDValue visitTokenFactor(SDNode *N);
 401     SDValue visitMERGE_VALUES(SDNode *N);
 402     SDValue visitADD(SDNode *N);
 403     SDValue visitADDLike(SDNode *N);
 404     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
 405     SDValue visitSUB(SDNode *N);
 406     SDValue visitADDSAT(SDNode *N);
 407     SDValue visitSUBSAT(SDNode *N);
 408     SDValue visitADDC(SDNode *N);
 409     SDValue visitADDO(SDNode *N);
 410     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 411     SDValue visitSUBC(SDNode *N);
 412     SDValue visitSUBO(SDNode *N);
 413     SDValue visitADDE(SDNode *N);
 414     SDValue visitADDCARRY(SDNode *N);
 415     SDValue visitSADDO_CARRY(SDNode *N);
 416     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 417     SDValue visitSUBE(SDNode *N);
 418     SDValue visitSUBCARRY(SDNode *N);
 419     SDValue visitSSUBO_CARRY(SDNode *N);
 420     SDValue visitMUL(SDNode *N);
 421     SDValue visitMULFIX(SDNode *N);
 422     SDValue useDivRem(SDNode *N);
 423     SDValue visitSDIV(SDNode *N);
 424     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 425     SDValue visitUDIV(SDNode *N);
 426     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 427     SDValue visitREM(SDNode *N);
 428     SDValue visitMULHU(SDNode *N);
 429     SDValue visitMULHS(SDNode *N);
 430     SDValue visitSMUL_LOHI(SDNode *N);
 431     SDValue visitUMUL_LOHI(SDNode *N);
 432     SDValue visitMULO(SDNode *N);
 433     SDValue visitIMINMAX(SDNode *N);
 434     SDValue visitAND(SDNode *N);
 435     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 436     SDValue visitOR(SDNode *N);
 437     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 438     SDValue visitXOR(SDNode *N);
 439     SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
 440     SDValue visitSHL(SDNode *N);
 441     SDValue visitSRA(SDNode *N);
 442     SDValue visitSRL(SDNode *N);
 443     SDValue visitFunnelShift(SDNode *N);
 444     SDValue visitRotate(SDNode *N);
 445     SDValue visitABS(SDNode *N);
 446     SDValue visitBSWAP(SDNode *N);
 447     SDValue visitBITREVERSE(SDNode *N);
 448     SDValue visitCTLZ(SDNode *N);
 449     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 450     SDValue visitCTTZ(SDNode *N);
 451     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 452     SDValue visitCTPOP(SDNode *N);
 453     SDValue visitSELECT(SDNode *N);
 454     SDValue visitVSELECT(SDNode *N);
 455     SDValue visitSELECT_CC(SDNode *N);
 456     SDValue visitSETCC(SDNode *N);
 457     SDValue visitSETCCCARRY(SDNode *N);
 458     SDValue visitSIGN_EXTEND(SDNode *N);
 459     SDValue visitZERO_EXTEND(SDNode *N);
 460     SDValue visitANY_EXTEND(SDNode *N);
 461     SDValue visitAssertExt(SDNode *N);
 462     SDValue visitAssertAlign(SDNode *N);
 463     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 464     SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
 465     SDValue visitTRUNCATE(SDNode *N);
 466     SDValue visitBITCAST(SDNode *N);
 467     SDValue visitFREEZE(SDNode *N);
 468     SDValue visitBUILD_PAIR(SDNode *N);
 469     SDValue visitFADD(SDNode *N);
 470     SDValue visitSTRICT_FADD(SDNode *N);
 471     SDValue visitFSUB(SDNode *N);
 472     SDValue visitFMUL(SDNode *N);
 473     SDValue visitFMA(SDNode *N);
 474     SDValue visitFDIV(SDNode *N);
 475     SDValue visitFREM(SDNode *N);
 476     SDValue visitFSQRT(SDNode *N);
 477     SDValue visitFCOPYSIGN(SDNode *N);
 478     SDValue visitFPOW(SDNode *N);
 479     SDValue visitSINT_TO_FP(SDNode *N);
 480     SDValue visitUINT_TO_FP(SDNode *N);
 481     SDValue visitFP_TO_SINT(SDNode *N);
 482     SDValue visitFP_TO_UINT(SDNode *N);
 483     SDValue visitFP_ROUND(SDNode *N);
 484     SDValue visitFP_EXTEND(SDNode *N);
 485     SDValue visitFNEG(SDNode *N);
 486     SDValue visitFABS(SDNode *N);
 487     SDValue visitFCEIL(SDNode *N);
 488     SDValue visitFTRUNC(SDNode *N);
 489     SDValue visitFFLOOR(SDNode *N);
 490     SDValue visitFMinMax(SDNode *N);
 491     SDValue visitBRCOND(SDNode *N);
 492     SDValue visitBR_CC(SDNode *N);
 493     SDValue visitLOAD(SDNode *N);
 494
 495     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 496     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 497
 498     SDValue visitSTORE(SDNode *N);
 499     SDValue visitLIFETIME_END(SDNode *N);
 500     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 501     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 502     SDValue visitBUILD_VECTOR(SDNode *N);
 503     SDValue visitCONCAT_VECTORS(SDNode *N);
 504     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 505     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 506     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 507     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 508     SDValue visitMLOAD(SDNode *N);
 509     SDValue visitMSTORE(SDNode *N);
 510     SDValue visitMGATHER(SDNode *N);
 511     SDValue visitMSCATTER(SDNode *N);
 512     SDValue visitFP_TO_FP16(SDNode *N);
 513     SDValue visitFP16_TO_FP(SDNode *N);
 514     SDValue visitVECREDUCE(SDNode *N);
 515     SDValue visitVPOp(SDNode *N);
 516
 517     SDValue visitFADDForFMACombine(SDNode *N);
 518     SDValue visitFSUBForFMACombine(SDNode *N);
 519     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 520
 521     SDValue XformToShuffleWithZero(SDNode *N);
 522     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
 523                                                     const SDLoc &DL, SDValue N0,
 524                                                     SDValue N1);
 525     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
 526                                       SDValue N1);
 527     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 528                            SDValue N1, SDNodeFlags Flags);
 529
 530     SDValue visitShiftByConstant(SDNode *N);
 531
 532     SDValue foldSelectOfConstants(SDNode *N);
 533     SDValue foldVSelectOfConstants(SDNode *N);
 534     SDValue foldBinOpIntoSelect(SDNode *BO);
 535     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 536     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
 537     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 538     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 539                              SDValue N2, SDValue N3, ISD::CondCode CC,
 540                              bool NotExtCompare = false);
 541     SDValue convertSelectOfFPConstantsToLoadOffset(
 542         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
 543         ISD::CondCode CC);
 544     SDValue foldSignChangeInBitcast(SDNode *N);
 545     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 546                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 547     SDValue foldSelectOfBinops(SDNode *N);
 548     SDValue foldSextSetcc(SDNode *N);
 549     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 550                               const SDLoc &DL);
 551     SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
 552     SDValue unfoldMaskedMerge(SDNode *N);
 553     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 554     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 555                           const SDLoc &DL, bool foldBooleans);
 556     SDValue rebuildSetCC(SDValue N);
 557
 558     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 559                            SDValue &CC, bool MatchStrict = false) const;
 560     bool isOneUseSetCC(SDValue N) const;
 561
 562     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 563                                          unsigned HiOp);
 564     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 565     SDValue CombineExtLoad(SDNode *N);
 566     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 567     SDValue combineRepeatedFPDivisors(SDNode *N);
 568     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 569     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 570     SDValue BuildSDIV(SDNode *N);
 571     SDValue BuildSDIVPow2(SDNode *N);
 572     SDValue BuildUDIV(SDNode *N);
 573     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 574     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
 575     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 576     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 577     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 578     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 579                                 SDNodeFlags Flags, bool Reciprocal);
 580     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 581                                 SDNodeFlags Flags, bool Reciprocal);
 582     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 583                                bool DemandHighBits = true);
 584     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 585     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 586                               SDValue InnerPos, SDValue InnerNeg,
 587                               unsigned PosOpcode, unsigned NegOpcode,
 588                               const SDLoc &DL);
 589     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
 590                               SDValue InnerPos, SDValue InnerNeg,
 591                               unsigned PosOpcode, unsigned NegOpcode,
 592                               const SDLoc &DL);
 593     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 594     SDValue MatchLoadCombine(SDNode *N);
 595     SDValue mergeTruncStores(StoreSDNode *N);
 596     SDValue reduceLoadWidth(SDNode *N);
 597     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 598     SDValue splitMergedValStore(StoreSDNode *ST);
 599     SDValue TransformFPLoadStorePair(SDNode *N);
 600     SDValue convertBuildVecZextToZext(SDNode *N);
 601     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 602     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
 603     SDValue reduceBuildVecToShuffle(SDNode *N);
 604     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 605                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 606                                   SDValue VecIn2, unsigned LeftIdx,
 607                                   bool DidSplitVec);
 608     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 609
 610     /// Walk up chain skipping non-aliasing memory nodes,
 611     /// looking for aliasing nodes and adding them to the Aliases vector.
 612     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 613                           SmallVectorImpl<SDValue> &Aliases);
 614
 615     /// Return true if there is any possibility that the two addresses overlap.
 616     bool mayAlias(SDNode *Op0, SDNode *Op1) const;
 617
 618     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 619     /// chain (aliasing node.)
 620     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 621
 622     /// Try to replace a store and any possibly adjacent stores on
 623     /// consecutive chains with better chains. Return true only if St is
 624     /// replaced.
 625     ///
 626     /// Notice that other chains may still be replaced even if the function
 627     /// returns false.
 628     bool findBetterNeighborChains(StoreSDNode *St);
 629
 630     // Helper for findBetterNeighborChains. Walk up store chain add additional
 631     // chained stores that do not overlap and can be parallelized.
 632     bool parallelizeChainedStores(StoreSDNode *St);
 633
 634     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 635     /// is located in a sequence of memory operations connected by a chain.
 636     struct MemOpLink {
 637       // Ptr to the mem node.
 638       LSBaseSDNode *MemNode;
 639
 640       // Offset from the base ptr.
 641       int64_t OffsetFromBase;
 642
 643       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 644           : MemNode(N), OffsetFromBase(Offset) {}
 645     };
 646
 647     // Classify the origin of a stored value.
 648     enum class StoreSource { Unknown, Constant, Extract, Load };
 649     StoreSource getStoreSource(SDValue StoreVal) {
 650       switch (StoreVal.getOpcode()) {
 651       case ISD::Constant:
 652       case ISD::ConstantFP:
 653         return StoreSource::Constant;
 654       case ISD::EXTRACT_VECTOR_ELT:
 655       case ISD::EXTRACT_SUBVECTOR:
 656         return StoreSource::Extract;
 657       case ISD::LOAD:
 658         return StoreSource::Load;
 659       default:
 660         return StoreSource::Unknown;
 661       }
 662     }
 663
 664     /// This is a helper function for visitMUL to check the profitability
 665     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 666     /// MulNode is the original multiply, AddNode is (add x, c1),
 667     /// and ConstNode is c2.
 668     bool isMulAddWithConstProfitable(SDNode *MulNode,
 669                                      SDValue &AddNode,
 670                                      SDValue &ConstNode);
 671
 672     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 673     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 674     /// the type of the loaded value to be extended.
 675     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 676                           EVT LoadResultTy, EVT &ExtVT);
 677
 678     /// Helper function to calculate whether the given Load/Store can have its
 679     /// width reduced to ExtVT.
 680     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 681                            EVT &MemVT, unsigned ShAmt = 0);
 682
 683     /// Used by BackwardsPropagateMask to find suitable loads.
 684     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
 685                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 686                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 687     /// Attempt to propagate a given AND node back to load leaves so that they
 688     /// can be combined into narrow loads.
 689     bool BackwardsPropagateMask(SDNode *N);
 690
 691     /// Helper function for mergeConsecutiveStores which merges the component
 692     /// store chains.
 693     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 694                                 unsigned NumStores);
 695
 696     /// This is a helper function for mergeConsecutiveStores. When the source
 697     /// elements of the consecutive stores are all constants or all extracted
 698     /// vector elements, try to merge them into one larger store introducing
 699     /// bitcasts if necessary.  \return True if a merged store was created.
 700     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 701                                          EVT MemVT, unsigned NumStores,
 702                                          bool IsConstantSrc, bool UseVector,
 703                                          bool UseTrunc);
 704
 705     /// This is a helper function for mergeConsecutiveStores. Stores that
 706     /// potentially may be merged with St are placed in StoreNodes. RootNode is
 707     /// a chain predecessor to all store candidates.
 708     void getStoreMergeCandidates(StoreSDNode *St,
 709                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 710                                  SDNode *&Root);
 711
 712     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
 713     /// have indirect dependency through their operands. RootNode is the
 714     /// predecessor to all stores calculated by getStoreMergeCandidates and is
 715     /// used to prune the dependency check. \return True if safe to merge.
 716     bool checkMergeStoreCandidatesForDependencies(
 717         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 718         SDNode *RootNode);
 719
 720     /// This is a helper function for mergeConsecutiveStores. Given a list of
 721     /// store candidates, find the first N that are consecutive in memory.
 722     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
 723     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
 724                                   int64_t ElementSizeBytes) const;
 725
 726     /// This is a helper function for mergeConsecutiveStores. It is used for
 727     /// store chains that are composed entirely of constant values.
 728     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
 729                                   unsigned NumConsecutiveStores,
 730                                   EVT MemVT, SDNode *Root, bool AllowVectors);
 731
 732     /// This is a helper function for mergeConsecutiveStores. It is used for
 733     /// store chains that are composed entirely of extracted vector elements.
 734     /// When extracting multiple vector elements, try to store them in one
 735     /// vector store rather than a sequence of scalar stores.
 736     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
 737                                  unsigned NumConsecutiveStores, EVT MemVT,
 738                                  SDNode *Root);
 739
 740     /// This is a helper function for mergeConsecutiveStores. It is used for
 741     /// store chains that are composed entirely of loaded values.
 742     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
 743                               unsigned NumConsecutiveStores, EVT MemVT,
 744                               SDNode *Root, bool AllowVectors,
 745                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
 746
 747     /// Merge consecutive store operations into a wide store.
 748     /// This optimization uses wide integers or vectors when possible.
 749     /// \return true if stores were merged.
 750     bool mergeConsecutiveStores(StoreSDNode *St);
 751
 752     /// Try to transform a truncation where C is a constant:
 753     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 754     ///
 755     /// \p N needs to be a truncation and its first operand an AND. Other
 756     /// requirements are checked by the function (e.g. that trunc is
 757     /// single-use) and if missed an empty SDValue is returned.
 758     SDValue distributeTruncateThroughAnd(SDNode *N);
 759
 760     /// Helper function to determine whether the target supports operation
 761     /// given by \p Opcode for type \p VT, that is, whether the operation
 762     /// is legal or custom before legalizing operations, and whether is
 763     /// legal (but not custom) after legalization.
 764     bool hasOperation(unsigned Opcode, EVT VT) {
 765       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
 766     }
 767
 768   public:
 769     /// Runs the dag combiner on all nodes in the work list
 770     void Run(CombineLevel AtLevel);
 771
 772     SelectionDAG &getDAG() const { return DAG; }
 773
 774     /// Returns a type large enough to hold any valid shift amount - before type
 775     /// legalization these can be huge.
 776     EVT getShiftAmountTy(EVT LHSTy) {
 777       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 778       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 779     }
 780
 781     /// This method returns true if we are running before type legalization or
 782     /// if the specified VT is legal.
 783     bool isTypeLegal(const EVT &VT) {
 784       if (!LegalTypes) return true;
 785       return TLI.isTypeLegal(VT);
 786     }
 787
 788     /// Convenience wrapper around TargetLowering::getSetCCResultType
 789     EVT getSetCCResultType(EVT VT) const {
 790       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 791     }
 792
 793     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 794                          SDValue OrigLoad, SDValue ExtLoad,
 795                          ISD::NodeType ExtType);
 796   };
 797
 798 /// This class is a DAGUpdateListener that removes any deleted
 799 /// nodes from the worklist.
 800 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 801   DAGCombiner &DC;
 802
 803 public:
 804   explicit WorklistRemover(DAGCombiner &dc)
 805     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 806
 807   void NodeDeleted(SDNode *N, SDNode *E) override {
 808     DC.removeFromWorklist(N);
 809   }
 810 };
 811
 812 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
 813   DAGCombiner &DC;
 814
 815 public:
 816   explicit WorklistInserter(DAGCombiner &dc)
 817       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 818
 819   // FIXME: Ideally we could add N to the worklist, but this causes exponential
 820   //        compile time costs in large DAGs, e.g. Halide.
 821   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
 822 };
 823
 824 } // end anonymous namespace
 825
 826 //===----------------------------------------------------------------------===//
 827 //  TargetLowering::DAGCombinerInfo implementation
 828 //===----------------------------------------------------------------------===//
 829
 830 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 831   ((DAGCombiner*)DC)->AddToWorklist(N);
 832 }
 833
 834 SDValue TargetLowering::DAGCombinerInfo::
 835 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 836   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 837 }
 838
 839 SDValue TargetLowering::DAGCombinerInfo::
 840 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 841   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 842 }
 843
 844 SDValue TargetLowering::DAGCombinerInfo::
 845 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 846   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 847 }
 848
 849 bool TargetLowering::DAGCombinerInfo::
 850 recursivelyDeleteUnusedNodes(SDNode *N) {
 851   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
 852 }
 853
 854 void TargetLowering::DAGCombinerInfo::
 855 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 856   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 857 }
 858
 859 //===----------------------------------------------------------------------===//
 860 // Helper Functions
 861 //===----------------------------------------------------------------------===//
 862
 863 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 864   removeFromWorklist(N);
 865
 866   // If the operands of this node are only used by the node, they will now be
 867   // dead. Make sure to re-visit them and recursively delete dead nodes.
 868   for (const SDValue &Op : N->ops())
 869     // For an operand generating multiple values, one of the values may
 870     // become dead allowing further simplification (e.g. split index
 871     // arithmetic from an indexed load).
 872     if (Op->hasOneUse() || Op->getNumValues() > 1)
 873       AddToWorklist(Op.getNode());
 874
 875   DAG.DeleteNode(N);
 876 }
 877
 878 // APInts must be the same size for most operations, this helper
 879 // function zero extends the shorter of the pair so that they match.
 880 // We provide an Offset so that we can create bitwidths that won't overflow.
 881 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 882   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 883   LHS = LHS.zextOrSelf(Bits);
 884   RHS = RHS.zextOrSelf(Bits);
 885 }
 886
 887 // Return true if this node is a setcc, or is a select_cc
 888 // that selects between the target values used for true and false, making it
 889 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 890 // the appropriate nodes based on the type of node we are checking. This
 891 // simplifies life a bit for the callers.
 892 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 893                                     SDValue &CC, bool MatchStrict) const {
 894   if (N.getOpcode() == ISD::SETCC) {
 895     LHS = N.getOperand(0);
 896     RHS = N.getOperand(1);
 897     CC  = N.getOperand(2);
 898     return true;
 899   }
 900
 901   if (MatchStrict &&
 902       (N.getOpcode() == ISD::STRICT_FSETCC ||
 903        N.getOpcode() == ISD::STRICT_FSETCCS)) {
 904     LHS = N.getOperand(1);
 905     RHS = N.getOperand(2);
 906     CC  = N.getOperand(3);
 907     return true;
 908   }
 909
 910   if (N.getOpcode() != ISD::SELECT_CC ||
 911       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 912       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 913     return false;
 914
 915   if (TLI.getBooleanContents(N.getValueType()) ==
 916       TargetLowering::UndefinedBooleanContent)
 917     return false;
 918
 919   LHS = N.getOperand(0);
 920   RHS = N.getOperand(1);
 921   CC  = N.getOperand(4);
 922   return true;
 923 }
 924
 925 /// Return true if this is a SetCC-equivalent operation with only one use.
 926 /// If this is true, it allows the users to invert the operation for free when
 927 /// it is profitable to do so.
 928 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 929   SDValue N0, N1, N2;
 930   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 931     return true;
 932   return false;
 933 }
 934
 935 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
 936   if (!ScalarTy.isSimple())
 937     return false;
 938
 939   uint64_t MaskForTy = 0ULL;
 940   switch (ScalarTy.getSimpleVT().SimpleTy) {
 941   case MVT::i8:
 942     MaskForTy = 0xFFULL;
 943     break;
 944   case MVT::i16:
 945     MaskForTy = 0xFFFFULL;
 946     break;
 947   case MVT::i32:
 948     MaskForTy = 0xFFFFFFFFULL;
 949     break;
 950   default:
 951     return false;
 952     break;
 953   }
 954
 955   APInt Val;
 956   if (ISD::isConstantSplatVector(N, Val))
 957     return Val.getLimitedValue() == MaskForTy;
 958
 959   return false;
 960 }
 961
 962 // Determines if it is a constant integer or a splat/build vector of constant
 963 // integers (and undefs).
 964 // Do not permit build vector implicit truncation.
 965 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 966   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 967     return !(Const->isOpaque() && NoOpaques);
 968   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
 969     return false;
 970   unsigned BitWidth = N.getScalarValueSizeInBits();
 971   for (const SDValue &Op : N->op_values()) {
 972     if (Op.isUndef())
 973       continue;
 974     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 975     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 976         (Const->isOpaque() && NoOpaques))
 977       return false;
 978   }
 979   return true;
 980 }
 981
 982 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 983 // undef's.
 984 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
 985   if (V.getOpcode() != ISD::BUILD_VECTOR)
 986     return false;
 987   return isConstantOrConstantVector(V, NoOpaques) ||
 988          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 989 }
 990
 991 // Determine if this an indexed load with an opaque target constant index.
 992 static bool canSplitIdx(LoadSDNode *LD) {
 993   return MaySplitLoadIndex &&
 994          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
 995           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
 996 }
 997
 998 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
 999                                                              const SDLoc &DL,
1000                                                              SDValue N0,
1001                                                              SDValue N1) {
1002   // Currently this only tries to ensure we don't undo the GEP splits done by
1003   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1004   // we check if the following transformation would be problematic:
1005   // (load/store (add, (add, x, offset1), offset2)) ->
1006   // (load/store (add, x, offset1+offset2)).
1007
1008   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1009     return false;
1010
1011   if (N0.hasOneUse())
1012     return false;
1013
1014   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1015   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1016   if (!C1 || !C2)
1017     return false;
1018
1019   const APInt &C1APIntVal = C1->getAPIntValue();
1020   const APInt &C2APIntVal = C2->getAPIntValue();
1021   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1022     return false;
1023
1024   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1025   if (CombinedValueIntVal.getBitWidth() > 64)
1026     return false;
1027   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1028
1029   for (SDNode *Node : N0->uses()) {
1030     auto LoadStore = dyn_cast<MemSDNode>(Node);
1031     if (LoadStore) {
1032       // Is x[offset2] already not a legal addressing mode? If so then
1033       // reassociating the constants breaks nothing (we test offset2 because
1034       // that's the one we hope to fold into the load or store).
1035       TargetLoweringBase::AddrMode AM;
1036       AM.HasBaseReg = true;
1037       AM.BaseOffs = C2APIntVal.getSExtValue();
1038       EVT VT = LoadStore->getMemoryVT();
1039       unsigned AS = LoadStore->getAddressSpace();
1040       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1041       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1042         continue;
1043
1044       // Would x[offset1+offset2] still be a legal addressing mode?
1045       AM.BaseOffs = CombinedValue;
1046       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1047         return true;
1048     }
1049   }
1050
1051   return false;
1052 }
1053
1054 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1055 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1056 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1057                                                SDValue N0, SDValue N1) {
1058   EVT VT = N0.getValueType();
1059
1060   if (N0.getOpcode() != Opc)
1061     return SDValue();
1062
1063   SDValue N00 = N0.getOperand(0);
1064   SDValue N01 = N0.getOperand(1);
1065
1066   if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
1067     if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
1068       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1069       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1070         return DAG.getNode(Opc, DL, VT, N00, OpNode);
1071       return SDValue();
1072     }
1073     if (N0.hasOneUse()) {
1074       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1075       //              iff (op x, c1) has one use
1076       if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
1077         return DAG.getNode(Opc, DL, VT, OpNode, N01);
1078       return SDValue();
1079     }
1080   }
1081   return SDValue();
1082 }
1083
1084 // Try to reassociate commutative binops.
1085 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1086                                     SDValue N1, SDNodeFlags Flags) {
1087   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1088
1089   // Floating-point reassociation is not allowed without loose FP math.
1090   if (N0.getValueType().isFloatingPoint() ||
1091       N1.getValueType().isFloatingPoint())
1092     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1093       return SDValue();
1094
1095   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1096     return Combined;
1097   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1098     return Combined;
1099   return SDValue();
1100 }
1101
1102 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1103                                bool AddTo) {
1104   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1105   ++NodesCombined;
1106   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1107              To[0].getNode()->dump(&DAG);
1108              dbgs() << " and " << NumTo - 1 << " other values\n");
1109   for (unsigned i = 0, e = NumTo; i != e; ++i)
1110     assert((!To[i].getNode() ||
1111             N->getValueType(i) == To[i].getValueType()) &&
1112            "Cannot combine value to value of different type!");
1113
1114   WorklistRemover DeadNodes(*this);
1115   DAG.ReplaceAllUsesWith(N, To);
1116   if (AddTo) {
1117     // Push the new nodes and any users onto the worklist
1118     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1119       if (To[i].getNode()) {
1120         AddToWorklist(To[i].getNode());
1121         AddUsersToWorklist(To[i].getNode());
1122       }
1123     }
1124   }
1125
1126   // Finally, if the node is now dead, remove it from the graph.  The node
1127   // may not be dead if the replacement process recursively simplified to
1128   // something else needing this node.
1129   if (N->use_empty())
1130     deleteAndRecombine(N);
1131   return SDValue(N, 0);
1132 }
1133
1134 void DAGCombiner::
1135 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1136   // Replace the old value with the new one.
1137   ++NodesCombined;
1138   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1139              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1140              dbgs() << '\n');
1141
1142   // Replace all uses.  If any nodes become isomorphic to other nodes and
1143   // are deleted, make sure to remove them from our worklist.
1144   WorklistRemover DeadNodes(*this);
1145   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1146
1147   // Push the new node and any (possibly new) users onto the worklist.
1148   AddToWorklistWithUsers(TLO.New.getNode());
1149
1150   // Finally, if the node is now dead, remove it from the graph.  The node
1151   // may not be dead if the replacement process recursively simplified to
1152   // something else needing this node.
1153   if (TLO.Old.getNode()->use_empty())
1154     deleteAndRecombine(TLO.Old.getNode());
1155 }
1156
1157 /// Check the specified integer node value to see if it can be simplified or if
1158 /// things it uses can be simplified by bit propagation. If so, return true.
1159 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1160                                        const APInt &DemandedElts,
1161                                        bool AssumeSingleUse) {
1162   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1163   KnownBits Known;
1164   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1165                                 AssumeSingleUse))
1166     return false;
1167
1168   // Revisit the node.
1169   AddToWorklist(Op.getNode());
1170
1171   CommitTargetLoweringOpt(TLO);
1172   return true;
1173 }
1174
1175 /// Check the specified vector node value to see if it can be simplified or
1176 /// if things it uses can be simplified as it only uses some of the elements.
1177 /// If so, return true.
1178 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1179                                              const APInt &DemandedElts,
1180                                              bool AssumeSingleUse) {
1181   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1182   APInt KnownUndef, KnownZero;
1183   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1184                                       TLO, 0, AssumeSingleUse))
1185     return false;
1186
1187   // Revisit the node.
1188   AddToWorklist(Op.getNode());
1189
1190   CommitTargetLoweringOpt(TLO);
1191   return true;
1192 }
1193
1194 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1195   SDLoc DL(Load);
1196   EVT VT = Load->getValueType(0);
1197   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1198
1199   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1200              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1201   WorklistRemover DeadNodes(*this);
1202   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1203   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1204   deleteAndRecombine(Load);
1205   AddToWorklist(Trunc.getNode());
1206 }
1207
1208 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1209   Replace = false;
1210   SDLoc DL(Op);
1211   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1212     LoadSDNode *LD = cast<LoadSDNode>(Op);
1213     EVT MemVT = LD->getMemoryVT();
1214     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1215                                                       : LD->getExtensionType();
1216     Replace = true;
1217     return DAG.getExtLoad(ExtType, DL, PVT,
1218                           LD->getChain(), LD->getBasePtr(),
1219                           MemVT, LD->getMemOperand());
1220   }
1221
1222   unsigned Opc = Op.getOpcode();
1223   switch (Opc) {
1224   default: break;
1225   case ISD::AssertSext:
1226     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1227       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1228     break;
1229   case ISD::AssertZext:
1230     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1231       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1232     break;
1233   case ISD::Constant: {
1234     unsigned ExtOpc =
1235       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1236     return DAG.getNode(ExtOpc, DL, PVT, Op);
1237   }
1238   }
1239
1240   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1241     return SDValue();
1242   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1243 }
1244
1245 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1246   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1247     return SDValue();
1248   EVT OldVT = Op.getValueType();
1249   SDLoc DL(Op);
1250   bool Replace = false;
1251   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1252   if (!NewOp.getNode())
1253     return SDValue();
1254   AddToWorklist(NewOp.getNode());
1255
1256   if (Replace)
1257     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1258   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1259                      DAG.getValueType(OldVT));
1260 }
1261
1262 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1263   EVT OldVT = Op.getValueType();
1264   SDLoc DL(Op);
1265   bool Replace = false;
1266   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1267   if (!NewOp.getNode())
1268     return SDValue();
1269   AddToWorklist(NewOp.getNode());
1270
1271   if (Replace)
1272     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1273   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1274 }
1275
1276 /// Promote the specified integer binary operation if the target indicates it is
1277 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1278 /// i32 since i16 instructions are longer.
1279 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1280   if (!LegalOperations)
1281     return SDValue();
1282
1283   EVT VT = Op.getValueType();
1284   if (VT.isVector() || !VT.isInteger())
1285     return SDValue();
1286
1287   // If operation type is 'undesirable', e.g. i16 on x86, consider
1288   // promoting it.
1289   unsigned Opc = Op.getOpcode();
1290   if (TLI.isTypeDesirableForOp(Opc, VT))
1291     return SDValue();
1292
1293   EVT PVT = VT;
1294   // Consult target whether it is a good idea to promote this operation and
1295   // what's the right type to promote it to.
1296   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1297     assert(PVT != VT && "Don't know what type to promote to!");
1298
1299     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1300
1301     bool Replace0 = false;
1302     SDValue N0 = Op.getOperand(0);
1303     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1304
1305     bool Replace1 = false;
1306     SDValue N1 = Op.getOperand(1);
1307     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1308     SDLoc DL(Op);
1309
1310     SDValue RV =
1311         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1312
1313     // We are always replacing N0/N1's use in N and only need additional
1314     // replacements if there are additional uses.
1315     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1316     //       (SDValue) here because the node may reference multiple values
1317     //       (for example, the chain value of a load node).
1318     Replace0 &= !N0->hasOneUse();
1319     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1320
1321     // Combine Op here so it is preserved past replacements.
1322     CombineTo(Op.getNode(), RV);
1323
1324     // If operands have a use ordering, make sure we deal with
1325     // predecessor first.
1326     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1327       std::swap(N0, N1);
1328       std::swap(NN0, NN1);
1329     }
1330
1331     if (Replace0) {
1332       AddToWorklist(NN0.getNode());
1333       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1334     }
1335     if (Replace1) {
1336       AddToWorklist(NN1.getNode());
1337       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1338     }
1339     return Op;
1340   }
1341   return SDValue();
1342 }
1343
1344 /// Promote the specified integer shift operation if the target indicates it is
1345 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1346 /// i32 since i16 instructions are longer.
1347 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1348   if (!LegalOperations)
1349     return SDValue();
1350
1351   EVT VT = Op.getValueType();
1352   if (VT.isVector() || !VT.isInteger())
1353     return SDValue();
1354
1355   // If operation type is 'undesirable', e.g. i16 on x86, consider
1356   // promoting it.
1357   unsigned Opc = Op.getOpcode();
1358   if (TLI.isTypeDesirableForOp(Opc, VT))
1359     return SDValue();
1360
1361   EVT PVT = VT;
1362   // Consult target whether it is a good idea to promote this operation and
1363   // what's the right type to promote it to.
1364   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1365     assert(PVT != VT && "Don't know what type to promote to!");
1366
1367     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1368
1369     bool Replace = false;
1370     SDValue N0 = Op.getOperand(0);
1371     SDValue N1 = Op.getOperand(1);
1372     if (Opc == ISD::SRA)
1373       N0 = SExtPromoteOperand(N0, PVT);
1374     else if (Opc == ISD::SRL)
1375       N0 = ZExtPromoteOperand(N0, PVT);
1376     else
1377       N0 = PromoteOperand(N0, PVT, Replace);
1378
1379     if (!N0.getNode())
1380       return SDValue();
1381
1382     SDLoc DL(Op);
1383     SDValue RV =
1384         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1385
1386     if (Replace)
1387       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1388
1389     // Deal with Op being deleted.
1390     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1391       return RV;
1392   }
1393   return SDValue();
1394 }
1395
1396 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1397   if (!LegalOperations)
1398     return SDValue();
1399
1400   EVT VT = Op.getValueType();
1401   if (VT.isVector() || !VT.isInteger())
1402     return SDValue();
1403
1404   // If operation type is 'undesirable', e.g. i16 on x86, consider
1405   // promoting it.
1406   unsigned Opc = Op.getOpcode();
1407   if (TLI.isTypeDesirableForOp(Opc, VT))
1408     return SDValue();
1409
1410   EVT PVT = VT;
1411   // Consult target whether it is a good idea to promote this operation and
1412   // what's the right type to promote it to.
1413   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1414     assert(PVT != VT && "Don't know what type to promote to!");
1415     // fold (aext (aext x)) -> (aext x)
1416     // fold (aext (zext x)) -> (zext x)
1417     // fold (aext (sext x)) -> (sext x)
1418     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1419     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1420   }
1421   return SDValue();
1422 }
1423
1424 bool DAGCombiner::PromoteLoad(SDValue Op) {
1425   if (!LegalOperations)
1426     return false;
1427
1428   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1429     return false;
1430
1431   EVT VT = Op.getValueType();
1432   if (VT.isVector() || !VT.isInteger())
1433     return false;
1434
1435   // If operation type is 'undesirable', e.g. i16 on x86, consider
1436   // promoting it.
1437   unsigned Opc = Op.getOpcode();
1438   if (TLI.isTypeDesirableForOp(Opc, VT))
1439     return false;
1440
1441   EVT PVT = VT;
1442   // Consult target whether it is a good idea to promote this operation and
1443   // what's the right type to promote it to.
1444   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1445     assert(PVT != VT && "Don't know what type to promote to!");
1446
1447     SDLoc DL(Op);
1448     SDNode *N = Op.getNode();
1449     LoadSDNode *LD = cast<LoadSDNode>(N);
1450     EVT MemVT = LD->getMemoryVT();
1451     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1452                                                       : LD->getExtensionType();
1453     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1454                                    LD->getChain(), LD->getBasePtr(),
1455                                    MemVT, LD->getMemOperand());
1456     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1457
1458     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1459                Result.getNode()->dump(&DAG); dbgs() << '\n');
1460     WorklistRemover DeadNodes(*this);
1461     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1462     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1463     deleteAndRecombine(N);
1464     AddToWorklist(Result.getNode());
1465     return true;
1466   }
1467   return false;
1468 }
1469
1470 /// Recursively delete a node which has no uses and any operands for
1471 /// which it is the only use.
1472 ///
1473 /// Note that this both deletes the nodes and removes them from the worklist.
1474 /// It also adds any nodes who have had a user deleted to the worklist as they
1475 /// may now have only one use and subject to other combines.
1476 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1477   if (!N->use_empty())
1478     return false;
1479
1480   SmallSetVector<SDNode *, 16> Nodes;
1481   Nodes.insert(N);
1482   do {
1483     N = Nodes.pop_back_val();
1484     if (!N)
1485       continue;
1486
1487     if (N->use_empty()) {
1488       for (const SDValue &ChildN : N->op_values())
1489         Nodes.insert(ChildN.getNode());
1490
1491       removeFromWorklist(N);
1492       DAG.DeleteNode(N);
1493     } else {
1494       AddToWorklist(N);
1495     }
1496   } while (!Nodes.empty());
1497   return true;
1498 }
1499
1500 //===----------------------------------------------------------------------===//
1501 //  Main DAG Combiner implementation
1502 //===----------------------------------------------------------------------===//
1503
1504 void DAGCombiner::Run(CombineLevel AtLevel) {
1505   // set the instance variables, so that the various visit routines may use it.
1506   Level = AtLevel;
1507   LegalDAG = Level >= AfterLegalizeDAG;
1508   LegalOperations = Level >= AfterLegalizeVectorOps;
1509   LegalTypes = Level >= AfterLegalizeTypes;
1510
1511   WorklistInserter AddNodes(*this);
1512
1513   // Add all the dag nodes to the worklist.
1514   for (SDNode &Node : DAG.allnodes())
1515     AddToWorklist(&Node);
1516
1517   // Create a dummy node (which is not added to allnodes), that adds a reference
1518   // to the root node, preventing it from being deleted, and tracking any
1519   // changes of the root.
1520   HandleSDNode Dummy(DAG.getRoot());
1521
1522   // While we have a valid worklist entry node, try to combine it.
1523   while (SDNode *N = getNextWorklistEntry()) {
1524     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1525     // N is deleted from the DAG, since they too may now be dead or may have a
1526     // reduced number of uses, allowing other xforms.
1527     if (recursivelyDeleteUnusedNodes(N))
1528       continue;
1529
1530     WorklistRemover DeadNodes(*this);
1531
1532     // If this combine is running after legalizing the DAG, re-legalize any
1533     // nodes pulled off the worklist.
1534     if (LegalDAG) {
1535       SmallSetVector<SDNode *, 16> UpdatedNodes;
1536       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1537
1538       for (SDNode *LN : UpdatedNodes)
1539         AddToWorklistWithUsers(LN);
1540
1541       if (!NIsValid)
1542         continue;
1543     }
1544
1545     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1546
1547     // Add any operands of the new node which have not yet been combined to the
1548     // worklist as well. Because the worklist uniques things already, this
1549     // won't repeatedly process the same operand.
1550     CombinedNodes.insert(N);
1551     for (const SDValue &ChildN : N->op_values())
1552       if (!CombinedNodes.count(ChildN.getNode()))
1553         AddToWorklist(ChildN.getNode());
1554
1555     SDValue RV = combine(N);
1556
1557     if (!RV.getNode())
1558       continue;
1559
1560     ++NodesCombined;
1561
1562     // If we get back the same node we passed in, rather than a new node or
1563     // zero, we know that the node must have defined multiple values and
1564     // CombineTo was used.  Since CombineTo takes care of the worklist
1565     // mechanics for us, we have no work to do in this case.
1566     if (RV.getNode() == N)
1567       continue;
1568
1569     assert(N->getOpcode() != ISD::DELETED_NODE &&
1570            RV.getOpcode() != ISD::DELETED_NODE &&
1571            "Node was deleted but visit returned new node!");
1572
1573     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1574
1575     if (N->getNumValues() == RV.getNode()->getNumValues())
1576       DAG.ReplaceAllUsesWith(N, RV.getNode());
1577     else {
1578       assert(N->getValueType(0) == RV.getValueType() &&
1579              N->getNumValues() == 1 && "Type mismatch");
1580       DAG.ReplaceAllUsesWith(N, &RV);
1581     }
1582
1583     // Push the new node and any users onto the worklist.  Omit this if the
1584     // new node is the EntryToken (e.g. if a store managed to get optimized
1585     // out), because re-visiting the EntryToken and its users will not uncover
1586     // any additional opportunities, but there may be a large number of such
1587     // users, potentially causing compile time explosion.
1588     if (RV.getOpcode() != ISD::EntryToken) {
1589       AddToWorklist(RV.getNode());
1590       AddUsersToWorklist(RV.getNode());
1591     }
1592
1593     // Finally, if the node is now dead, remove it from the graph.  The node
1594     // may not be dead if the replacement process recursively simplified to
1595     // something else needing this node. This will also take care of adding any
1596     // operands which have lost a user to the worklist.
1597     recursivelyDeleteUnusedNodes(N);
1598   }
1599
1600   // If the root changed (e.g. it was a dead load, update the root).
1601   DAG.setRoot(Dummy.getValue());
1602   DAG.RemoveDeadNodes();
1603 }
1604
1605 SDValue DAGCombiner::visit(SDNode *N) {
1606   switch (N->getOpcode()) {
1607   default: break;
1608   case ISD::TokenFactor:        return visitTokenFactor(N);
1609   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1610   case ISD::ADD:                return visitADD(N);
1611   case ISD::SUB:                return visitSUB(N);
1612   case ISD::SADDSAT:
1613   case ISD::UADDSAT:            return visitADDSAT(N);
1614   case ISD::SSUBSAT:
1615   case ISD::USUBSAT:            return visitSUBSAT(N);
1616   case ISD::ADDC:               return visitADDC(N);
1617   case ISD::SADDO:
1618   case ISD::UADDO:              return visitADDO(N);
1619   case ISD::SUBC:               return visitSUBC(N);
1620   case ISD::SSUBO:
1621   case ISD::USUBO:              return visitSUBO(N);
1622   case ISD::ADDE:               return visitADDE(N);
1623   case ISD::ADDCARRY:           return visitADDCARRY(N);
1624   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1625   case ISD::SUBE:               return visitSUBE(N);
1626   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1627   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1628   case ISD::SMULFIX:
1629   case ISD::SMULFIXSAT:
1630   case ISD::UMULFIX:
1631   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1632   case ISD::MUL:                return visitMUL(N);
1633   case ISD::SDIV:               return visitSDIV(N);
1634   case ISD::UDIV:               return visitUDIV(N);
1635   case ISD::SREM:
1636   case ISD::UREM:               return visitREM(N);
1637   case ISD::MULHU:              return visitMULHU(N);
1638   case ISD::MULHS:              return visitMULHS(N);
1639   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1640   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1641   case ISD::SMULO:
1642   case ISD::UMULO:              return visitMULO(N);
1643   case ISD::SMIN:
1644   case ISD::SMAX:
1645   case ISD::UMIN:
1646   case ISD::UMAX:               return visitIMINMAX(N);
1647   case ISD::AND:                return visitAND(N);
1648   case ISD::OR:                 return visitOR(N);
1649   case ISD::XOR:                return visitXOR(N);
1650   case ISD::SHL:                return visitSHL(N);
1651   case ISD::SRA:                return visitSRA(N);
1652   case ISD::SRL:                return visitSRL(N);
1653   case ISD::ROTR:
1654   case ISD::ROTL:               return visitRotate(N);
1655   case ISD::FSHL:
1656   case ISD::FSHR:               return visitFunnelShift(N);
1657   case ISD::ABS:                return visitABS(N);
1658   case ISD::BSWAP:              return visitBSWAP(N);
1659   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1660   case ISD::CTLZ:               return visitCTLZ(N);
1661   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1662   case ISD::CTTZ:               return visitCTTZ(N);
1663   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1664   case ISD::CTPOP:              return visitCTPOP(N);
1665   case ISD::SELECT:             return visitSELECT(N);
1666   case ISD::VSELECT:            return visitVSELECT(N);
1667   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1668   case ISD::SETCC:              return visitSETCC(N);
1669   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1670   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1671   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1672   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1673   case ISD::AssertSext:
1674   case ISD::AssertZext:         return visitAssertExt(N);
1675   case ISD::AssertAlign:        return visitAssertAlign(N);
1676   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1677   case ISD::SIGN_EXTEND_VECTOR_INREG:
1678   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1679   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1680   case ISD::BITCAST:            return visitBITCAST(N);
1681   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1682   case ISD::FADD:               return visitFADD(N);
1683   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
1684   case ISD::FSUB:               return visitFSUB(N);
1685   case ISD::FMUL:               return visitFMUL(N);
1686   case ISD::FMA:                return visitFMA(N);
1687   case ISD::FDIV:               return visitFDIV(N);
1688   case ISD::FREM:               return visitFREM(N);
1689   case ISD::FSQRT:              return visitFSQRT(N);
1690   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1691   case ISD::FPOW:               return visitFPOW(N);
1692   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1693   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1694   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1695   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1696   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1697   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1698   case ISD::FNEG:               return visitFNEG(N);
1699   case ISD::FABS:               return visitFABS(N);
1700   case ISD::FFLOOR:             return visitFFLOOR(N);
1701   case ISD::FMINNUM:
1702   case ISD::FMAXNUM:
1703   case ISD::FMINIMUM:
1704   case ISD::FMAXIMUM:           return visitFMinMax(N);
1705   case ISD::FCEIL:              return visitFCEIL(N);
1706   case ISD::FTRUNC:             return visitFTRUNC(N);
1707   case ISD::BRCOND:             return visitBRCOND(N);
1708   case ISD::BR_CC:              return visitBR_CC(N);
1709   case ISD::LOAD:               return visitLOAD(N);
1710   case ISD::STORE:              return visitSTORE(N);
1711   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1712   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1713   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1714   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1715   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1716   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1717   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1718   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1719   case ISD::MGATHER:            return visitMGATHER(N);
1720   case ISD::MLOAD:              return visitMLOAD(N);
1721   case ISD::MSCATTER:           return visitMSCATTER(N);
1722   case ISD::MSTORE:             return visitMSTORE(N);
1723   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1724   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1725   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1726   case ISD::FREEZE:             return visitFREEZE(N);
1727   case ISD::VECREDUCE_FADD:
1728   case ISD::VECREDUCE_FMUL:
1729   case ISD::VECREDUCE_ADD:
1730   case ISD::VECREDUCE_MUL:
1731   case ISD::VECREDUCE_AND:
1732   case ISD::VECREDUCE_OR:
1733   case ISD::VECREDUCE_XOR:
1734   case ISD::VECREDUCE_SMAX:
1735   case ISD::VECREDUCE_SMIN:
1736   case ISD::VECREDUCE_UMAX:
1737   case ISD::VECREDUCE_UMIN:
1738   case ISD::VECREDUCE_FMAX:
1739   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1740 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1741 #include "llvm/IR/VPIntrinsics.def"
1742     return visitVPOp(N);
1743   }
1744   return SDValue();
1745 }
1746
1747 SDValue DAGCombiner::combine(SDNode *N) {
1748   SDValue RV;
1749   if (!DisableGenericCombines)
1750     RV = visit(N);
1751
1752   // If nothing happened, try a target-specific DAG combine.
1753   if (!RV.getNode()) {
1754     assert(N->getOpcode() != ISD::DELETED_NODE &&
1755            "Node was deleted but visit returned NULL!");
1756
1757     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1758         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1759
1760       // Expose the DAG combiner to the target combiner impls.
1761       TargetLowering::DAGCombinerInfo
1762         DagCombineInfo(DAG, Level, false, this);
1763
1764       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1765     }
1766   }
1767
1768   // If nothing happened still, try promoting the operation.
1769   if (!RV.getNode()) {
1770     switch (N->getOpcode()) {
1771     default: break;
1772     case ISD::ADD:
1773     case ISD::SUB:
1774     case ISD::MUL:
1775     case ISD::AND:
1776     case ISD::OR:
1777     case ISD::XOR:
1778       RV = PromoteIntBinOp(SDValue(N, 0));
1779       break;
1780     case ISD::SHL:
1781     case ISD::SRA:
1782     case ISD::SRL:
1783       RV = PromoteIntShiftOp(SDValue(N, 0));
1784       break;
1785     case ISD::SIGN_EXTEND:
1786     case ISD::ZERO_EXTEND:
1787     case ISD::ANY_EXTEND:
1788       RV = PromoteExtend(SDValue(N, 0));
1789       break;
1790     case ISD::LOAD:
1791       if (PromoteLoad(SDValue(N, 0)))
1792         RV = SDValue(N, 0);
1793       break;
1794     }
1795   }
1796
1797   // If N is a commutative binary node, try to eliminate it if the commuted
1798   // version is already present in the DAG.
1799   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1800       N->getNumValues() == 1) {
1801     SDValue N0 = N->getOperand(0);
1802     SDValue N1 = N->getOperand(1);
1803
1804     // Constant operands are canonicalized to RHS.
1805     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1806       SDValue Ops[] = {N1, N0};
1807       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1808                                             N->getFlags());
1809       if (CSENode)
1810         return SDValue(CSENode, 0);
1811     }
1812   }
1813
1814   return RV;
1815 }
1816
1817 /// Given a node, return its input chain if it has one, otherwise return a null
1818 /// sd operand.
1819 static SDValue getInputChainForNode(SDNode *N) {
1820   if (unsigned NumOps = N->getNumOperands()) {
1821     if (N->getOperand(0).getValueType() == MVT::Other)
1822       return N->getOperand(0);
1823     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1824       return N->getOperand(NumOps-1);
1825     for (unsigned i = 1; i < NumOps-1; ++i)
1826       if (N->getOperand(i).getValueType() == MVT::Other)
1827         return N->getOperand(i);
1828   }
1829   return SDValue();
1830 }
1831
1832 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1833   // If N has two operands, where one has an input chain equal to the other,
1834   // the 'other' chain is redundant.
1835   if (N->getNumOperands() == 2) {
1836     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1837       return N->getOperand(0);
1838     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1839       return N->getOperand(1);
1840   }
1841
1842   // Don't simplify token factors if optnone.
1843   if (OptLevel == CodeGenOpt::None)
1844     return SDValue();
1845
1846   // Don't simplify the token factor if the node itself has too many operands.
1847   if (N->getNumOperands() > TokenFactorInlineLimit)
1848     return SDValue();
1849
1850   // If the sole user is a token factor, we should make sure we have a
1851   // chance to merge them together. This prevents TF chains from inhibiting
1852   // optimizations.
1853   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1854     AddToWorklist(*(N->use_begin()));
1855
1856   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1857   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1858   SmallPtrSet<SDNode*, 16> SeenOps;
1859   bool Changed = false;             // If we should replace this token factor.
1860
1861   // Start out with this token factor.
1862   TFs.push_back(N);
1863
1864   // Iterate through token factors.  The TFs grows when new token factors are
1865   // encountered.
1866   for (unsigned i = 0; i < TFs.size(); ++i) {
1867     // Limit number of nodes to inline, to avoid quadratic compile times.
1868     // We have to add the outstanding Token Factors to Ops, otherwise we might
1869     // drop Ops from the resulting Token Factors.
1870     if (Ops.size() > TokenFactorInlineLimit) {
1871       for (unsigned j = i; j < TFs.size(); j++)
1872         Ops.emplace_back(TFs[j], 0);
1873       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1874       // combiner worklist later.
1875       TFs.resize(i);
1876       break;
1877     }
1878
1879     SDNode *TF = TFs[i];
1880     // Check each of the operands.
1881     for (const SDValue &Op : TF->op_values()) {
1882       switch (Op.getOpcode()) {
1883       case ISD::EntryToken:
1884         // Entry tokens don't need to be added to the list. They are
1885         // redundant.
1886         Changed = true;
1887         break;
1888
1889       case ISD::TokenFactor:
1890         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1891           // Queue up for processing.
1892           TFs.push_back(Op.getNode());
1893           Changed = true;
1894           break;
1895         }
1896         LLVM_FALLTHROUGH;
1897
1898       default:
1899         // Only add if it isn't already in the list.
1900         if (SeenOps.insert(Op.getNode()).second)
1901           Ops.push_back(Op);
1902         else
1903           Changed = true;
1904         break;
1905       }
1906     }
1907   }
1908
1909   // Re-visit inlined Token Factors, to clean them up in case they have been
1910   // removed. Skip the first Token Factor, as this is the current node.
1911   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1912     AddToWorklist(TFs[i]);
1913
1914   // Remove Nodes that are chained to another node in the list. Do so
1915   // by walking up chains breath-first stopping when we've seen
1916   // another operand. In general we must climb to the EntryNode, but we can exit
1917   // early if we find all remaining work is associated with just one operand as
1918   // no further pruning is possible.
1919
1920   // List of nodes to search through and original Ops from which they originate.
1921   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1922   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1923   SmallPtrSet<SDNode *, 16> SeenChains;
1924   bool DidPruneOps = false;
1925
1926   unsigned NumLeftToConsider = 0;
1927   for (const SDValue &Op : Ops) {
1928     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1929     OpWorkCount.push_back(1);
1930   }
1931
1932   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1933     // If this is an Op, we can remove the op from the list. Remark any
1934     // search associated with it as from the current OpNumber.
1935     if (SeenOps.contains(Op)) {
1936       Changed = true;
1937       DidPruneOps = true;
1938       unsigned OrigOpNumber = 0;
1939       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1940         OrigOpNumber++;
1941       assert((OrigOpNumber != Ops.size()) &&
1942              "expected to find TokenFactor Operand");
1943       // Re-mark worklist from OrigOpNumber to OpNumber
1944       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1945         if (Worklist[i].second == OrigOpNumber) {
1946           Worklist[i].second = OpNumber;
1947         }
1948       }
1949       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1950       OpWorkCount[OrigOpNumber] = 0;
1951       NumLeftToConsider--;
1952     }
1953     // Add if it's a new chain
1954     if (SeenChains.insert(Op).second) {
1955       OpWorkCount[OpNumber]++;
1956       Worklist.push_back(std::make_pair(Op, OpNumber));
1957     }
1958   };
1959
1960   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1961     // We need at least be consider at least 2 Ops to prune.
1962     if (NumLeftToConsider <= 1)
1963       break;
1964     auto CurNode = Worklist[i].first;
1965     auto CurOpNumber = Worklist[i].second;
1966     assert((OpWorkCount[CurOpNumber] > 0) &&
1967            "Node should not appear in worklist");
1968     switch (CurNode->getOpcode()) {
1969     case ISD::EntryToken:
1970       // Hitting EntryToken is the only way for the search to terminate without
1971       // hitting
1972       // another operand's search. Prevent us from marking this operand
1973       // considered.
1974       NumLeftToConsider++;
1975       break;
1976     case ISD::TokenFactor:
1977       for (const SDValue &Op : CurNode->op_values())
1978         AddToWorklist(i, Op.getNode(), CurOpNumber);
1979       break;
1980     case ISD::LIFETIME_START:
1981     case ISD::LIFETIME_END:
1982     case ISD::CopyFromReg:
1983     case ISD::CopyToReg:
1984       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1985       break;
1986     default:
1987       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1988         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1989       break;
1990     }
1991     OpWorkCount[CurOpNumber]--;
1992     if (OpWorkCount[CurOpNumber] == 0)
1993       NumLeftToConsider--;
1994   }
1995
1996   // If we've changed things around then replace token factor.
1997   if (Changed) {
1998     SDValue Result;
1999     if (Ops.empty()) {
2000       // The entry token is the only possible outcome.
2001       Result = DAG.getEntryNode();
2002     } else {
2003       if (DidPruneOps) {
2004         SmallVector<SDValue, 8> PrunedOps;
2005         //
2006         for (const SDValue &Op : Ops) {
2007           if (SeenChains.count(Op.getNode()) == 0)
2008             PrunedOps.push_back(Op);
2009         }
2010         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2011       } else {
2012         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2013       }
2014     }
2015     return Result;
2016   }
2017   return SDValue();
2018 }
2019
2020 /// MERGE_VALUES can always be eliminated.
2021 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2022   WorklistRemover DeadNodes(*this);
2023   // Replacing results may cause a different MERGE_VALUES to suddenly
2024   // be CSE'd with N, and carry its uses with it. Iterate until no
2025   // uses remain, to ensure that the node can be safely deleted.
2026   // First add the users of this node to the work list so that they
2027   // can be tried again once they have new operands.
2028   AddUsersToWorklist(N);
2029   do {
2030     // Do as a single replacement to avoid rewalking use lists.
2031     SmallVector<SDValue, 8> Ops;
2032     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2033       Ops.push_back(N->getOperand(i));
2034     DAG.ReplaceAllUsesWith(N, Ops.data());
2035   } while (!N->use_empty());
2036   deleteAndRecombine(N);
2037   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2038 }
2039
2040 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2041 /// ConstantSDNode pointer else nullptr.
2042 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2043   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2044   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2045 }
2046
2047 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2048 /// and that N may be folded in the load / store addressing mode.
2049 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2050                                     const TargetLowering &TLI) {
2051   EVT VT;
2052   unsigned AS;
2053
2054   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2055     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2056       return false;
2057     VT = LD->getMemoryVT();
2058     AS = LD->getAddressSpace();
2059   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2060     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2061       return false;
2062     VT = ST->getMemoryVT();
2063     AS = ST->getAddressSpace();
2064   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2065     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2066       return false;
2067     VT = LD->getMemoryVT();
2068     AS = LD->getAddressSpace();
2069   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2070     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2071       return false;
2072     VT = ST->getMemoryVT();
2073     AS = ST->getAddressSpace();
2074   } else
2075     return false;
2076
2077   TargetLowering::AddrMode AM;
2078   if (N->getOpcode() == ISD::ADD) {
2079     AM.HasBaseReg = true;
2080     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2081     if (Offset)
2082       // [reg +/- imm]
2083       AM.BaseOffs = Offset->getSExtValue();
2084     else
2085       // [reg +/- reg]
2086       AM.Scale = 1;
2087   } else if (N->getOpcode() == ISD::SUB) {
2088     AM.HasBaseReg = true;
2089     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2090     if (Offset)
2091       // [reg +/- imm]
2092       AM.BaseOffs = -Offset->getSExtValue();
2093     else
2094       // [reg +/- reg]
2095       AM.Scale = 1;
2096   } else
2097     return false;
2098
2099   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2100                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2101 }
2102
2103 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2104   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2105          "Unexpected binary operator");
2106
2107   // Don't do this unless the old select is going away. We want to eliminate the
2108   // binary operator, not replace a binop with a select.
2109   // TODO: Handle ISD::SELECT_CC.
2110   unsigned SelOpNo = 0;
2111   SDValue Sel = BO->getOperand(0);
2112   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2113     SelOpNo = 1;
2114     Sel = BO->getOperand(1);
2115   }
2116
2117   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2118     return SDValue();
2119
2120   SDValue CT = Sel.getOperand(1);
2121   if (!isConstantOrConstantVector(CT, true) &&
2122       !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2123     return SDValue();
2124
2125   SDValue CF = Sel.getOperand(2);
2126   if (!isConstantOrConstantVector(CF, true) &&
2127       !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2128     return SDValue();
2129
2130   // Bail out if any constants are opaque because we can't constant fold those.
2131   // The exception is "and" and "or" with either 0 or -1 in which case we can
2132   // propagate non constant operands into select. I.e.:
2133   // and (select Cond, 0, -1), X --> select Cond, 0, X
2134   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2135   auto BinOpcode = BO->getOpcode();
2136   bool CanFoldNonConst =
2137       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2138       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2139       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2140
2141   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2142   if (!CanFoldNonConst &&
2143       !isConstantOrConstantVector(CBO, true) &&
2144       !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2145     return SDValue();
2146
2147   EVT VT = BO->getValueType(0);
2148
2149   // We have a select-of-constants followed by a binary operator with a
2150   // constant. Eliminate the binop by pulling the constant math into the select.
2151   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2152   SDLoc DL(Sel);
2153   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2154                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2155   if (!CanFoldNonConst && !NewCT.isUndef() &&
2156       !isConstantOrConstantVector(NewCT, true) &&
2157       !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2158     return SDValue();
2159
2160   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2161                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2162   if (!CanFoldNonConst && !NewCF.isUndef() &&
2163       !isConstantOrConstantVector(NewCF, true) &&
2164       !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2165     return SDValue();
2166
2167   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2168   SelectOp->setFlags(BO->getFlags());
2169   return SelectOp;
2170 }
2171
2172 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2173   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2174          "Expecting add or sub");
2175
2176   // Match a constant operand and a zext operand for the math instruction:
2177   // add Z, C
2178   // sub C, Z
2179   bool IsAdd = N->getOpcode() == ISD::ADD;
2180   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2181   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2182   auto *CN = dyn_cast<ConstantSDNode>(C);
2183   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2184     return SDValue();
2185
2186   // Match the zext operand as a setcc of a boolean.
2187   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2188       Z.getOperand(0).getValueType() != MVT::i1)
2189     return SDValue();
2190
2191   // Match the compare as: setcc (X & 1), 0, eq.
2192   SDValue SetCC = Z.getOperand(0);
2193   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2194   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2195       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2196       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2197     return SDValue();
2198
2199   // We are adding/subtracting a constant and an inverted low bit. Turn that
2200   // into a subtract/add of the low bit with incremented/decremented constant:
2201   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2202   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2203   EVT VT = C.getValueType();
2204   SDLoc DL(N);
2205   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2206   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2207                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2208   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2209 }
2210
2211 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2212 /// a shift and add with a different constant.
2213 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2214   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2215          "Expecting add or sub");
2216
2217   // We need a constant operand for the add/sub, and the other operand is a
2218   // logical shift right: add (srl), C or sub C, (srl).
2219   bool IsAdd = N->getOpcode() == ISD::ADD;
2220   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2221   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2222   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2223       ShiftOp.getOpcode() != ISD::SRL)
2224     return SDValue();
2225
2226   // The shift must be of a 'not' value.
2227   SDValue Not = ShiftOp.getOperand(0);
2228   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2229     return SDValue();
2230
2231   // The shift must be moving the sign bit to the least-significant-bit.
2232   EVT VT = ShiftOp.getValueType();
2233   SDValue ShAmt = ShiftOp.getOperand(1);
2234   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2235   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2236     return SDValue();
2237
2238   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2239   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2240   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2241   SDLoc DL(N);
2242   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2243   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2244   if (SDValue NewC =
2245           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2246                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2247     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2248   return SDValue();
2249 }
2250
2251 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2252 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2253 /// are no common bits set in the operands).
2254 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2255   SDValue N0 = N->getOperand(0);
2256   SDValue N1 = N->getOperand(1);
2257   EVT VT = N0.getValueType();
2258   SDLoc DL(N);
2259
2260   // fold (add x, undef) -> undef
2261   if (N0.isUndef())
2262     return N0;
2263   if (N1.isUndef())
2264     return N1;
2265
2266   // fold (add c1, c2) -> c1+c2
2267   if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2268     return C;
2269
2270   // canonicalize constant to RHS
2271   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2272       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2273     return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2274
2275   // fold vector ops
2276   if (VT.isVector()) {
2277     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2278       return FoldedVOp;
2279
2280     // fold (add x, 0) -> x, vector edition
2281     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2282       return N0;
2283   }
2284
2285   // fold (add x, 0) -> x
2286   if (isNullConstant(N1))
2287     return N0;
2288
2289   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2290     // fold ((A-c1)+c2) -> (A+(c2-c1))
2291     if (N0.getOpcode() == ISD::SUB &&
2292         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2293       SDValue Sub =
2294           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2295       assert(Sub && "Constant folding failed");
2296       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2297     }
2298
2299     // fold ((c1-A)+c2) -> (c1+c2)-A
2300     if (N0.getOpcode() == ISD::SUB &&
2301         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2302       SDValue Add =
2303           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2304       assert(Add && "Constant folding failed");
2305       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2306     }
2307
2308     // add (sext i1 X), 1 -> zext (not i1 X)
2309     // We don't transform this pattern:
2310     //   add (zext i1 X), -1 -> sext (not i1 X)
2311     // because most (?) targets generate better code for the zext form.
2312     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2313         isOneOrOneSplat(N1)) {
2314       SDValue X = N0.getOperand(0);
2315       if ((!LegalOperations ||
2316            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2317             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2318           X.getScalarValueSizeInBits() == 1) {
2319         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2320         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2321       }
2322     }
2323
2324     // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2325     // equivalent to (add x, c0).
2326     if (N0.getOpcode() == ISD::OR &&
2327         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2328         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2329       if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2330                                                     {N1, N0.getOperand(1)}))
2331         return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2332     }
2333   }
2334
2335   if (SDValue NewSel = foldBinOpIntoSelect(N))
2336     return NewSel;
2337
2338   // reassociate add
2339   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2340     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2341       return RADD;
2342
2343     // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2344     // equivalent to (add x, c).
2345     auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2346       if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2347           isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2348           DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2349         return DAG.getNode(ISD::ADD, DL, VT,
2350                            DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2351                            N0.getOperand(1));
2352       }
2353       return SDValue();
2354     };
2355     if (SDValue Add = ReassociateAddOr(N0, N1))
2356       return Add;
2357     if (SDValue Add = ReassociateAddOr(N1, N0))
2358       return Add;
2359   }
2360   // fold ((0-A) + B) -> B-A
2361   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2362     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2363
2364   // fold (A + (0-B)) -> A-B
2365   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2366     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2367
2368   // fold (A+(B-A)) -> B
2369   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2370     return N1.getOperand(0);
2371
2372   // fold ((B-A)+A) -> B
2373   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2374     return N0.getOperand(0);
2375
2376   // fold ((A-B)+(C-A)) -> (C-B)
2377   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2378       N0.getOperand(0) == N1.getOperand(1))
2379     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380                        N0.getOperand(1));
2381
2382   // fold ((A-B)+(B-C)) -> (A-C)
2383   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2384       N0.getOperand(1) == N1.getOperand(0))
2385     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2386                        N1.getOperand(1));
2387
2388   // fold (A+(B-(A+C))) to (B-C)
2389   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2390       N0 == N1.getOperand(1).getOperand(0))
2391     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2392                        N1.getOperand(1).getOperand(1));
2393
2394   // fold (A+(B-(C+A))) to (B-C)
2395   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2396       N0 == N1.getOperand(1).getOperand(1))
2397     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2398                        N1.getOperand(1).getOperand(0));
2399
2400   // fold (A+((B-A)+or-C)) to (B+or-C)
2401   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2402       N1.getOperand(0).getOpcode() == ISD::SUB &&
2403       N0 == N1.getOperand(0).getOperand(1))
2404     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2405                        N1.getOperand(1));
2406
2407   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2408   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2409     SDValue N00 = N0.getOperand(0);
2410     SDValue N01 = N0.getOperand(1);
2411     SDValue N10 = N1.getOperand(0);
2412     SDValue N11 = N1.getOperand(1);
2413
2414     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2415       return DAG.getNode(ISD::SUB, DL, VT,
2416                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2417                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2418   }
2419
2420   // fold (add (umax X, C), -C) --> (usubsat X, C)
2421   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2422     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2423       return (!Max && !Op) ||
2424              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2425     };
2426     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2427                                   /*AllowUndefs*/ true))
2428       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2429                          N0.getOperand(1));
2430   }
2431
2432   if (SimplifyDemandedBits(SDValue(N, 0)))
2433     return SDValue(N, 0);
2434
2435   if (isOneOrOneSplat(N1)) {
2436     // fold (add (xor a, -1), 1) -> (sub 0, a)
2437     if (isBitwiseNot(N0))
2438       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2439                          N0.getOperand(0));
2440
2441     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2442     if (N0.getOpcode() == ISD::ADD) {
2443       SDValue A, Xor;
2444
2445       if (isBitwiseNot(N0.getOperand(0))) {
2446         A = N0.getOperand(1);
2447         Xor = N0.getOperand(0);
2448       } else if (isBitwiseNot(N0.getOperand(1))) {
2449         A = N0.getOperand(0);
2450         Xor = N0.getOperand(1);
2451       }
2452
2453       if (Xor)
2454         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2455     }
2456
2457     // Look for:
2458     //   add (add x, y), 1
2459     // And if the target does not like this form then turn into:
2460     //   sub y, (xor x, -1)
2461     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2462         N0.getOpcode() == ISD::ADD) {
2463       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2464                                 DAG.getAllOnesConstant(DL, VT));
2465       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2466     }
2467   }
2468
2469   // (x - y) + -1  ->  add (xor y, -1), x
2470   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2471       isAllOnesOrAllOnesSplat(N1)) {
2472     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2473     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2474   }
2475
2476   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2477     return Combined;
2478
2479   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2480     return Combined;
2481
2482   return SDValue();
2483 }
2484
2485 SDValue DAGCombiner::visitADD(SDNode *N) {
2486   SDValue N0 = N->getOperand(0);
2487   SDValue N1 = N->getOperand(1);
2488   EVT VT = N0.getValueType();
2489   SDLoc DL(N);
2490
2491   if (SDValue Combined = visitADDLike(N))
2492     return Combined;
2493
2494   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2495     return V;
2496
2497   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2498     return V;
2499
2500   // fold (a+b) -> (a|b) iff a and b share no bits.
2501   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2502       DAG.haveNoCommonBitsSet(N0, N1))
2503     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2504
2505   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2506   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2507     const APInt &C0 = N0->getConstantOperandAPInt(0);
2508     const APInt &C1 = N1->getConstantOperandAPInt(0);
2509     return DAG.getVScale(DL, VT, C0 + C1);
2510   }
2511
2512   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2513   if ((N0.getOpcode() == ISD::ADD) &&
2514       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2515       (N1.getOpcode() == ISD::VSCALE)) {
2516     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2517     const APInt &VS1 = N1->getConstantOperandAPInt(0);
2518     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2519     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2520   }
2521
2522   // Fold (add step_vector(c1), step_vector(c2)  to step_vector(c1+c2))
2523   if (N0.getOpcode() == ISD::STEP_VECTOR &&
2524       N1.getOpcode() == ISD::STEP_VECTOR) {
2525     const APInt &C0 = N0->getConstantOperandAPInt(0);
2526     const APInt &C1 = N1->getConstantOperandAPInt(0);
2527     APInt NewStep = C0 + C1;
2528     return DAG.getStepVector(DL, VT, NewStep);
2529   }
2530
2531   // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2532   if ((N0.getOpcode() == ISD::ADD) &&
2533       (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2534       (N1.getOpcode() == ISD::STEP_VECTOR)) {
2535     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2536     const APInt &SV1 = N1->getConstantOperandAPInt(0);
2537     APInt NewStep = SV0 + SV1;
2538     SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2539     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2540   }
2541
2542   return SDValue();
2543 }
2544
2545 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2546   unsigned Opcode = N->getOpcode();
2547   SDValue N0 = N->getOperand(0);
2548   SDValue N1 = N->getOperand(1);
2549   EVT VT = N0.getValueType();
2550   SDLoc DL(N);
2551
2552   // fold (add_sat x, undef) -> -1
2553   if (N0.isUndef() || N1.isUndef())
2554     return DAG.getAllOnesConstant(DL, VT);
2555
2556   // fold (add_sat c1, c2) -> c3
2557   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2558     return C;
2559
2560   // canonicalize constant to RHS
2561   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2562       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2563     return DAG.getNode(Opcode, DL, VT, N1, N0);
2564
2565   // fold vector ops
2566   if (VT.isVector()) {
2567     // TODO SimplifyVBinOp
2568
2569     // fold (add_sat x, 0) -> x, vector edition
2570     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2571       return N0;
2572   }
2573
2574   // fold (add_sat x, 0) -> x
2575   if (isNullConstant(N1))
2576     return N0;
2577
2578   // If it cannot overflow, transform into an add.
2579   if (Opcode == ISD::UADDSAT)
2580     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2581       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2582
2583   return SDValue();
2584 }
2585
2586 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2587   bool Masked = false;
2588
2589   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2590   while (true) {
2591     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2592       V = V.getOperand(0);
2593       continue;
2594     }
2595
2596     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2597       Masked = true;
2598       V = V.getOperand(0);
2599       continue;
2600     }
2601
2602     break;
2603   }
2604
2605   // If this is not a carry, return.
2606   if (V.getResNo() != 1)
2607     return SDValue();
2608
2609   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2610       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2611     return SDValue();
2612
2613   EVT VT = V.getNode()->getValueType(0);
2614   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2615     return SDValue();
2616
2617   // If the result is masked, then no matter what kind of bool it is we can
2618   // return. If it isn't, then we need to make sure the bool type is either 0 or
2619   // 1 and not other values.
2620   if (Masked ||
2621       TLI.getBooleanContents(V.getValueType()) ==
2622           TargetLoweringBase::ZeroOrOneBooleanContent)
2623     return V;
2624
2625   return SDValue();
2626 }
2627
2628 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2629 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2630 /// the opcode and bypass the mask operation.
2631 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2632                                  SelectionDAG &DAG, const SDLoc &DL) {
2633   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2634     return SDValue();
2635
2636   EVT VT = N0.getValueType();
2637   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2638     return SDValue();
2639
2640   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2641   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2642   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2643 }
2644
2645 /// Helper for doing combines based on N0 and N1 being added to each other.
2646 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2647                                           SDNode *LocReference) {
2648   EVT VT = N0.getValueType();
2649   SDLoc DL(LocReference);
2650
2651   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2652   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2653       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2654     return DAG.getNode(ISD::SUB, DL, VT, N0,
2655                        DAG.getNode(ISD::SHL, DL, VT,
2656                                    N1.getOperand(0).getOperand(1),
2657                                    N1.getOperand(1)));
2658
2659   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2660     return V;
2661
2662   // Look for:
2663   //   add (add x, 1), y
2664   // And if the target does not like this form then turn into:
2665   //   sub y, (xor x, -1)
2666   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2667       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2668     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2669                               DAG.getAllOnesConstant(DL, VT));
2670     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2671   }
2672
2673   // Hoist one-use subtraction by non-opaque constant:
2674   //   (x - C) + y  ->  (x + y) - C
2675   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2676   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2677       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2678     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2679     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2680   }
2681   // Hoist one-use subtraction from non-opaque constant:
2682   //   (C - x) + y  ->  (y - x) + C
2683   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2684       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2685     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2686     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2687   }
2688
2689   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2690   // rather than 'add 0/-1' (the zext should get folded).
2691   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2692   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2693       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2694       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2695     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2696     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2697   }
2698
2699   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2700   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2701     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2702     if (TN->getVT() == MVT::i1) {
2703       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2704                                  DAG.getConstant(1, DL, VT));
2705       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2706     }
2707   }
2708
2709   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2710   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2711       N1.getResNo() == 0)
2712     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2713                        N0, N1.getOperand(0), N1.getOperand(2));
2714
2715   // (add X, Carry) -> (addcarry X, 0, Carry)
2716   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2717     if (SDValue Carry = getAsCarry(TLI, N1))
2718       return DAG.getNode(ISD::ADDCARRY, DL,
2719                          DAG.getVTList(VT, Carry.getValueType()), N0,
2720                          DAG.getConstant(0, DL, VT), Carry);
2721
2722   return SDValue();
2723 }
2724
2725 SDValue DAGCombiner::visitADDC(SDNode *N) {
2726   SDValue N0 = N->getOperand(0);
2727   SDValue N1 = N->getOperand(1);
2728   EVT VT = N0.getValueType();
2729   SDLoc DL(N);
2730
2731   // If the flag result is dead, turn this into an ADD.
2732   if (!N->hasAnyUseOfValue(1))
2733     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2734                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2735
2736   // canonicalize constant to RHS.
2737   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2738   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2739   if (N0C && !N1C)
2740     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2741
2742   // fold (addc x, 0) -> x + no carry out
2743   if (isNullConstant(N1))
2744     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2745                                         DL, MVT::Glue));
2746
2747   // If it cannot overflow, transform into an add.
2748   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2749     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2750                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2751
2752   return SDValue();
2753 }
2754
2755 /**
2756  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2757  * then the flip also occurs if computing the inverse is the same cost.
2758  * This function returns an empty SDValue in case it cannot flip the boolean
2759  * without increasing the cost of the computation. If you want to flip a boolean
2760  * no matter what, use DAG.getLogicalNOT.
2761  */
2762 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2763                                   const TargetLowering &TLI,
2764                                   bool Force) {
2765   if (Force && isa<ConstantSDNode>(V))
2766     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2767
2768   if (V.getOpcode() != ISD::XOR)
2769     return SDValue();
2770
2771   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2772   if (!Const)
2773     return SDValue();
2774
2775   EVT VT = V.getValueType();
2776
2777   bool IsFlip = false;
2778   switch(TLI.getBooleanContents(VT)) {
2779     case TargetLowering::ZeroOrOneBooleanContent:
2780       IsFlip = Const->isOne();
2781       break;
2782     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2783       IsFlip = Const->isAllOnes();
2784       break;
2785     case TargetLowering::UndefinedBooleanContent:
2786       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2787       break;
2788   }
2789
2790   if (IsFlip)
2791     return V.getOperand(0);
2792   if (Force)
2793     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2794   return SDValue();
2795 }
2796
2797 SDValue DAGCombiner::visitADDO(SDNode *N) {
2798   SDValue N0 = N->getOperand(0);
2799   SDValue N1 = N->getOperand(1);
2800   EVT VT = N0.getValueType();
2801   bool IsSigned = (ISD::SADDO == N->getOpcode());
2802
2803   EVT CarryVT = N->getValueType(1);
2804   SDLoc DL(N);
2805
2806   // If the flag result is dead, turn this into an ADD.
2807   if (!N->hasAnyUseOfValue(1))
2808     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2809                      DAG.getUNDEF(CarryVT));
2810
2811   // canonicalize constant to RHS.
2812   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2813       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2814     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2815
2816   // fold (addo x, 0) -> x + no carry out
2817   if (isNullOrNullSplat(N1))
2818     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2819
2820   if (!IsSigned) {
2821     // If it cannot overflow, transform into an add.
2822     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2823       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2824                        DAG.getConstant(0, DL, CarryVT));
2825
2826     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2827     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2828       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2829                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2830       return CombineTo(
2831           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2832     }
2833
2834     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2835       return Combined;
2836
2837     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2838       return Combined;
2839   }
2840
2841   return SDValue();
2842 }
2843
2844 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2845   EVT VT = N0.getValueType();
2846   if (VT.isVector())
2847     return SDValue();
2848
2849   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2850   // If Y + 1 cannot overflow.
2851   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2852     SDValue Y = N1.getOperand(0);
2853     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2854     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2855       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2856                          N1.getOperand(2));
2857   }
2858
2859   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2860   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2861     if (SDValue Carry = getAsCarry(TLI, N1))
2862       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2863                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2864
2865   return SDValue();
2866 }
2867
2868 SDValue DAGCombiner::visitADDE(SDNode *N) {
2869   SDValue N0 = N->getOperand(0);
2870   SDValue N1 = N->getOperand(1);
2871   SDValue CarryIn = N->getOperand(2);
2872
2873   // canonicalize constant to RHS
2874   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2875   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2876   if (N0C && !N1C)
2877     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2878                        N1, N0, CarryIn);
2879
2880   // fold (adde x, y, false) -> (addc x, y)
2881   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2882     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2883
2884   return SDValue();
2885 }
2886
2887 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2888   SDValue N0 = N->getOperand(0);
2889   SDValue N1 = N->getOperand(1);
2890   SDValue CarryIn = N->getOperand(2);
2891   SDLoc DL(N);
2892
2893   // canonicalize constant to RHS
2894   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2895   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2896   if (N0C && !N1C)
2897     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2898
2899   // fold (addcarry x, y, false) -> (uaddo x, y)
2900   if (isNullConstant(CarryIn)) {
2901     if (!LegalOperations ||
2902         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2903       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2904   }
2905
2906   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2907   if (isNullConstant(N0) && isNullConstant(N1)) {
2908     EVT VT = N0.getValueType();
2909     EVT CarryVT = CarryIn.getValueType();
2910     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2911     AddToWorklist(CarryExt.getNode());
2912     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2913                                     DAG.getConstant(1, DL, VT)),
2914                      DAG.getConstant(0, DL, CarryVT));
2915   }
2916
2917   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2918     return Combined;
2919
2920   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2921     return Combined;
2922
2923   return SDValue();
2924 }
2925
2926 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2927   SDValue N0 = N->getOperand(0);
2928   SDValue N1 = N->getOperand(1);
2929   SDValue CarryIn = N->getOperand(2);
2930   SDLoc DL(N);
2931
2932   // canonicalize constant to RHS
2933   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2934   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2935   if (N0C && !N1C)
2936     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2937
2938   // fold (saddo_carry x, y, false) -> (saddo x, y)
2939   if (isNullConstant(CarryIn)) {
2940     if (!LegalOperations ||
2941         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2942       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2943   }
2944
2945   return SDValue();
2946 }
2947
2948 /**
2949  * If we are facing some sort of diamond carry propapagtion pattern try to
2950  * break it up to generate something like:
2951  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2952  *
2953  * The end result is usually an increase in operation required, but because the
2954  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2955  *
2956  * Patterns typically look something like
2957  *            (uaddo A, B)
2958  *             /       \
2959  *          Carry      Sum
2960  *            |          \
2961  *            | (addcarry *, 0, Z)
2962  *            |       /
2963  *             \   Carry
2964  *              |   /
2965  * (addcarry X, *, *)
2966  *
2967  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2968  * produce a combine with a single path for carry propagation.
2969  */
2970 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2971                                       SDValue X, SDValue Carry0, SDValue Carry1,
2972                                       SDNode *N) {
2973   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2974     return SDValue();
2975   if (Carry1.getOpcode() != ISD::UADDO)
2976     return SDValue();
2977
2978   SDValue Z;
2979
2980   /**
2981    * First look for a suitable Z. It will present itself in the form of
2982    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2983    */
2984   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2985       isNullConstant(Carry0.getOperand(1))) {
2986     Z = Carry0.getOperand(2);
2987   } else if (Carry0.getOpcode() == ISD::UADDO &&
2988              isOneConstant(Carry0.getOperand(1))) {
2989     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2990     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2991   } else {
2992     // We couldn't find a suitable Z.
2993     return SDValue();
2994   }
2995
2996
2997   auto cancelDiamond = [&](SDValue A,SDValue B) {
2998     SDLoc DL(N);
2999     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3000     Combiner.AddToWorklist(NewY.getNode());
3001     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3002                        DAG.getConstant(0, DL, X.getValueType()),
3003                        NewY.getValue(1));
3004   };
3005
3006   /**
3007    *      (uaddo A, B)
3008    *           |
3009    *          Sum
3010    *           |
3011    * (addcarry *, 0, Z)
3012    */
3013   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3014     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3015   }
3016
3017   /**
3018    * (addcarry A, 0, Z)
3019    *         |
3020    *        Sum
3021    *         |
3022    *  (uaddo *, B)
3023    */
3024   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3025     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3026   }
3027
3028   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3029     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3030   }
3031
3032   return SDValue();
3033 }
3034
3035 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3036 // match patterns like:
3037 //
3038 //          (uaddo A, B)            CarryIn
3039 //            |  \                     |
3040 //            |   \                    |
3041 //    PartialSum   PartialCarryOutX   /
3042 //            |        |             /
3043 //            |    ____|____________/
3044 //            |   /    |
3045 //     (uaddo *, *)    \________
3046 //       |  \                   \
3047 //       |   \                   |
3048 //       |    PartialCarryOutY   |
3049 //       |        \              |
3050 //       |         \            /
3051 //   AddCarrySum    |    ______/
3052 //                  |   /
3053 //   CarryOut = (or *, *)
3054 //
3055 // And generate ADDCARRY (or SUBCARRY) with two result values:
3056 //
3057 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3058 //
3059 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3060 // a single path for carry/borrow out propagation:
3061 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3062                                    const TargetLowering &TLI, SDValue Carry0,
3063                                    SDValue Carry1, SDNode *N) {
3064   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3065     return SDValue();
3066   unsigned Opcode = Carry0.getOpcode();
3067   if (Opcode != Carry1.getOpcode())
3068     return SDValue();
3069   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3070     return SDValue();
3071
3072   // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3073   // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3074   // the above ASCII art.)
3075   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3076       Carry1.getOperand(1) != Carry0.getValue(0))
3077     std::swap(Carry0, Carry1);
3078   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3079       Carry1.getOperand(1) != Carry0.getValue(0))
3080     return SDValue();
3081
3082   // The carry in value must be on the righthand side for subtraction.
3083   unsigned CarryInOperandNum =
3084       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3085   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3086     return SDValue();
3087   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3088
3089   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3090   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3091     return SDValue();
3092
3093   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3094   // TODO: make getAsCarry() aware of how partial carries are merged.
3095   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3096     return SDValue();
3097   CarryIn = CarryIn.getOperand(0);
3098   if (CarryIn.getValueType() != MVT::i1)
3099     return SDValue();
3100
3101   SDLoc DL(N);
3102   SDValue Merged =
3103       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3104                   Carry0.getOperand(1), CarryIn);
3105
3106   // Please note that because we have proven that the result of the UADDO/USUBO
3107   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3108   // therefore prove that if the first UADDO/USUBO overflows, the second
3109   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3110   // maximum value.
3111   //
3112   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3113   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3114   //
3115   // This is important because it means that OR and XOR can be used to merge
3116   // carry flags; and that AND can return a constant zero.
3117   //
3118   // TODO: match other operations that can merge flags (ADD, etc)
3119   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3120   if (N->getOpcode() == ISD::AND)
3121     return DAG.getConstant(0, DL, MVT::i1);
3122   return Merged.getValue(1);
3123 }
3124
3125 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3126                                        SDNode *N) {
3127   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3128   if (isBitwiseNot(N0))
3129     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3130       SDLoc DL(N);
3131       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3132                                 N0.getOperand(0), NotC);
3133       return CombineTo(
3134           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3135     }
3136
3137   // Iff the flag result is dead:
3138   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3139   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3140   // or the dependency between the instructions.
3141   if ((N0.getOpcode() == ISD::ADD ||
3142        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3143         N0.getValue(1) != CarryIn)) &&
3144       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3145     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3146                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3147
3148   /**
3149    * When one of the addcarry argument is itself a carry, we may be facing
3150    * a diamond carry propagation. In which case we try to transform the DAG
3151    * to ensure linear carry propagation if that is possible.
3152    */
3153   if (auto Y = getAsCarry(TLI, N1)) {
3154     // Because both are carries, Y and Z can be swapped.
3155     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3156       return R;
3157     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3158       return R;
3159   }
3160
3161   return SDValue();
3162 }
3163
3164 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3165 // clamp/truncation if necessary.
3166 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3167                                    SDValue RHS, SelectionDAG &DAG,
3168                                    const SDLoc &DL) {
3169   assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3170          "Illegal truncation");
3171
3172   if (DstVT == SrcVT)
3173     return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3174
3175   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3176   // clamping RHS.
3177   APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3178                                           DstVT.getScalarSizeInBits());
3179   if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3180     return SDValue();
3181
3182   SDValue SatLimit =
3183       DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3184                                            DstVT.getScalarSizeInBits()),
3185                       DL, SrcVT);
3186   RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3187   RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3188   LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3189   return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3190 }
3191
3192 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3193 // usubsat(a,b), optionally as a truncated type.
3194 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3195   if (N->getOpcode() != ISD::SUB ||
3196       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3197     return SDValue();
3198
3199   EVT SubVT = N->getValueType(0);
3200   SDValue Op0 = N->getOperand(0);
3201   SDValue Op1 = N->getOperand(1);
3202
3203   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3204   // they may be converted to usubsat(a,b).
3205   if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3206     SDValue MaxLHS = Op0.getOperand(0);
3207     SDValue MaxRHS = Op0.getOperand(1);
3208     if (MaxLHS == Op1)
3209       return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3210     if (MaxRHS == Op1)
3211       return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3212   }
3213
3214   if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3215     SDValue MinLHS = Op1.getOperand(0);
3216     SDValue MinRHS = Op1.getOperand(1);
3217     if (MinLHS == Op0)
3218       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3219     if (MinRHS == Op0)
3220       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3221   }
3222
3223   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3224   if (Op1.getOpcode() == ISD::TRUNCATE &&
3225       Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3226       Op1.getOperand(0).hasOneUse()) {
3227     SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3228     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3229     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3230       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3231                                  DAG, SDLoc(N));
3232     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3233       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3234                                  DAG, SDLoc(N));
3235   }
3236
3237   return SDValue();
3238 }
3239
3240 // Since it may not be valid to emit a fold to zero for vector initializers
3241 // check if we can before folding.
3242 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3243                              SelectionDAG &DAG, bool LegalOperations) {
3244   if (!VT.isVector())
3245     return DAG.getConstant(0, DL, VT);
3246   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3247     return DAG.getConstant(0, DL, VT);
3248   return SDValue();
3249 }
3250
3251 SDValue DAGCombiner::visitSUB(SDNode *N) {
3252   SDValue N0 = N->getOperand(0);
3253   SDValue N1 = N->getOperand(1);
3254   EVT VT = N0.getValueType();
3255   SDLoc DL(N);
3256
3257   // fold (sub x, x) -> 0
3258   // FIXME: Refactor this and xor and other similar operations together.
3259   if (N0 == N1)
3260     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3261
3262   // fold (sub c1, c2) -> c3
3263   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3264     return C;
3265
3266   // fold vector ops
3267   if (VT.isVector()) {
3268     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3269       return FoldedVOp;
3270
3271     // fold (sub x, 0) -> x, vector edition
3272     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3273       return N0;
3274   }
3275
3276   if (SDValue NewSel = foldBinOpIntoSelect(N))
3277     return NewSel;
3278
3279   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3280
3281   // fold (sub x, c) -> (add x, -c)
3282   if (N1C) {
3283     return DAG.getNode(ISD::ADD, DL, VT, N0,
3284                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3285   }
3286
3287   if (isNullOrNullSplat(N0)) {
3288     unsigned BitWidth = VT.getScalarSizeInBits();
3289     // Right-shifting everything out but the sign bit followed by negation is
3290     // the same as flipping arithmetic/logical shift type without the negation:
3291     // -(X >>u 31) -> (X >>s 31)
3292     // -(X >>s 31) -> (X >>u 31)
3293     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3294       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3295       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3296         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3297         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3298           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3299       }
3300     }
3301
3302     // 0 - X --> 0 if the sub is NUW.
3303     if (N->getFlags().hasNoUnsignedWrap())
3304       return N0;
3305
3306     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3307       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3308       // N1 must be 0 because negating the minimum signed value is undefined.
3309       if (N->getFlags().hasNoSignedWrap())
3310         return N0;
3311
3312       // 0 - X --> X if X is 0 or the minimum signed value.
3313       return N1;
3314     }
3315
3316     // Convert 0 - abs(x).
3317     if (N1->getOpcode() == ISD::ABS &&
3318         !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
3319       if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3320         return Result;
3321
3322     // Fold neg(splat(neg(x)) -> splat(x)
3323     if (VT.isVector()) {
3324       SDValue N1S = DAG.getSplatValue(N1, true);
3325       if (N1S && N1S.getOpcode() == ISD::SUB &&
3326           isNullConstant(N1S.getOperand(0))) {
3327         if (VT.isScalableVector())
3328           return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3329         return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3330       }
3331     }
3332   }
3333
3334   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3335   if (isAllOnesOrAllOnesSplat(N0))
3336     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3337
3338   // fold (A - (0-B)) -> A+B
3339   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3340     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3341
3342   // fold A-(A-B) -> B
3343   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3344     return N1.getOperand(1);
3345
3346   // fold (A+B)-A -> B
3347   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3348     return N0.getOperand(1);
3349
3350   // fold (A+B)-B -> A
3351   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3352     return N0.getOperand(0);
3353
3354   // fold (A+C1)-C2 -> A+(C1-C2)
3355   if (N0.getOpcode() == ISD::ADD &&
3356       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3357       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3358     SDValue NewC =
3359         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3360     assert(NewC && "Constant folding failed");
3361     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3362   }
3363
3364   // fold C2-(A+C1) -> (C2-C1)-A
3365   if (N1.getOpcode() == ISD::ADD) {
3366     SDValue N11 = N1.getOperand(1);
3367     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3368         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3369       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3370       assert(NewC && "Constant folding failed");
3371       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3372     }
3373   }
3374
3375   // fold (A-C1)-C2 -> A-(C1+C2)
3376   if (N0.getOpcode() == ISD::SUB &&
3377       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3378       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3379     SDValue NewC =
3380         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3381     assert(NewC && "Constant folding failed");
3382     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3383   }
3384
3385   // fold (c1-A)-c2 -> (c1-c2)-A
3386   if (N0.getOpcode() == ISD::SUB &&
3387       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3388       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3389     SDValue NewC =
3390         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3391     assert(NewC && "Constant folding failed");
3392     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3393   }
3394
3395   // fold ((A+(B+or-C))-B) -> A+or-C
3396   if (N0.getOpcode() == ISD::ADD &&
3397       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3398        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3399       N0.getOperand(1).getOperand(0) == N1)
3400     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3401                        N0.getOperand(1).getOperand(1));
3402
3403   // fold ((A+(C+B))-B) -> A+C
3404   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3405       N0.getOperand(1).getOperand(1) == N1)
3406     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3407                        N0.getOperand(1).getOperand(0));
3408
3409   // fold ((A-(B-C))-C) -> A-B
3410   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3411       N0.getOperand(1).getOperand(1) == N1)
3412     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3413                        N0.getOperand(1).getOperand(0));
3414
3415   // fold (A-(B-C)) -> A+(C-B)
3416   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3417     return DAG.getNode(ISD::ADD, DL, VT, N0,
3418                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3419                                    N1.getOperand(0)));
3420
3421   // A - (A & B)  ->  A & (~B)
3422   if (N1.getOpcode() == ISD::AND) {
3423     SDValue A = N1.getOperand(0);
3424     SDValue B = N1.getOperand(1);
3425     if (A != N0)
3426       std::swap(A, B);
3427     if (A == N0 &&
3428         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3429       SDValue InvB =
3430           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3431       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3432     }
3433   }
3434
3435   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3436   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3437     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3438         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3439       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3440                                 N1.getOperand(0).getOperand(1),
3441                                 N1.getOperand(1));
3442       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3443     }
3444     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3445         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3446       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3447                                 N1.getOperand(0),
3448                                 N1.getOperand(1).getOperand(1));
3449       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3450     }
3451   }
3452
3453   // If either operand of a sub is undef, the result is undef
3454   if (N0.isUndef())
3455     return N0;
3456   if (N1.isUndef())
3457     return N1;
3458
3459   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3460     return V;
3461
3462   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3463     return V;
3464
3465   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3466     return V;
3467
3468   if (SDValue V = foldSubToUSubSat(VT, N))
3469     return V;
3470
3471   // (x - y) - 1  ->  add (xor y, -1), x
3472   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3473     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3474                               DAG.getAllOnesConstant(DL, VT));
3475     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3476   }
3477
3478   // Look for:
3479   //   sub y, (xor x, -1)
3480   // And if the target does not like this form then turn into:
3481   //   add (add x, y), 1
3482   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3483     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3484     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3485   }
3486
3487   // Hoist one-use addition by non-opaque constant:
3488   //   (x + C) - y  ->  (x - y) + C
3489   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3490       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3491     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3492     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3493   }
3494   // y - (x + C)  ->  (y - x) - C
3495   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3496       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3497     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3498     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3499   }
3500   // (x - C) - y  ->  (x - y) - C
3501   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3502   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3503       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3504     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3505     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3506   }
3507   // (C - x) - y  ->  C - (x + y)
3508   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3509       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3510     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3511     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3512   }
3513
3514   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3515   // rather than 'sub 0/1' (the sext should get folded).
3516   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3517   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3518       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3519       TLI.getBooleanContents(VT) ==
3520           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3521     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3522     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3523   }
3524
3525   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3526   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3527     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3528       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3529       SDValue S0 = N1.getOperand(0);
3530       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3531         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3532           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3533             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3534     }
3535   }
3536
3537   // If the relocation model supports it, consider symbol offsets.
3538   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3539     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3540       // fold (sub Sym, c) -> Sym-c
3541       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3542         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3543                                     GA->getOffset() -
3544                                         (uint64_t)N1C->getSExtValue());
3545       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3546       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3547         if (GA->getGlobal() == GB->getGlobal())
3548           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3549                                  DL, VT);
3550     }
3551
3552   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3553   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3554     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3555     if (TN->getVT() == MVT::i1) {
3556       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3557                                  DAG.getConstant(1, DL, VT));
3558       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3559     }
3560   }
3561
3562   // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3563   if (N1.getOpcode() == ISD::VSCALE) {
3564     const APInt &IntVal = N1.getConstantOperandAPInt(0);
3565     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3566   }
3567
3568   // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3569   if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3570     APInt NewStep = -N1.getConstantOperandAPInt(0);
3571     return DAG.getNode(ISD::ADD, DL, VT, N0,
3572                        DAG.getStepVector(DL, VT, NewStep));
3573   }
3574
3575   // Prefer an add for more folding potential and possibly better codegen:
3576   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3577   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3578     SDValue ShAmt = N1.getOperand(1);
3579     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3580     if (ShAmtC &&
3581         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3582       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3583       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3584     }
3585   }
3586
3587   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3588     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3589     if (SDValue Carry = getAsCarry(TLI, N0)) {
3590       SDValue X = N1;
3591       SDValue Zero = DAG.getConstant(0, DL, VT);
3592       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3593       return DAG.getNode(ISD::ADDCARRY, DL,
3594                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3595                          Carry);
3596     }
3597   }
3598
3599   return SDValue();
3600 }
3601
3602 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3603   SDValue N0 = N->getOperand(0);
3604   SDValue N1 = N->getOperand(1);
3605   EVT VT = N0.getValueType();
3606   SDLoc DL(N);
3607
3608   // fold (sub_sat x, undef) -> 0
3609   if (N0.isUndef() || N1.isUndef())
3610     return DAG.getConstant(0, DL, VT);
3611
3612   // fold (sub_sat x, x) -> 0
3613   if (N0 == N1)
3614     return DAG.getConstant(0, DL, VT);
3615
3616   // fold (sub_sat c1, c2) -> c3
3617   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3618     return C;
3619
3620   // fold vector ops
3621   if (VT.isVector()) {
3622     // TODO SimplifyVBinOp
3623
3624     // fold (sub_sat x, 0) -> x, vector edition
3625     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3626       return N0;
3627   }
3628
3629   // fold (sub_sat x, 0) -> x
3630   if (isNullConstant(N1))
3631     return N0;
3632
3633   return SDValue();
3634 }
3635
3636 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3637   SDValue N0 = N->getOperand(0);
3638   SDValue N1 = N->getOperand(1);
3639   EVT VT = N0.getValueType();
3640   SDLoc DL(N);
3641
3642   // If the flag result is dead, turn this into an SUB.
3643   if (!N->hasAnyUseOfValue(1))
3644     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3645                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3646
3647   // fold (subc x, x) -> 0 + no borrow
3648   if (N0 == N1)
3649     return CombineTo(N, DAG.getConstant(0, DL, VT),
3650                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3651
3652   // fold (subc x, 0) -> x + no borrow
3653   if (isNullConstant(N1))
3654     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3655
3656   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3657   if (isAllOnesConstant(N0))
3658     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3659                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3660
3661   return SDValue();
3662 }
3663
3664 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3665   SDValue N0 = N->getOperand(0);
3666   SDValue N1 = N->getOperand(1);
3667   EVT VT = N0.getValueType();
3668   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3669
3670   EVT CarryVT = N->getValueType(1);
3671   SDLoc DL(N);
3672
3673   // If the flag result is dead, turn this into an SUB.
3674   if (!N->hasAnyUseOfValue(1))
3675     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3676                      DAG.getUNDEF(CarryVT));
3677
3678   // fold (subo x, x) -> 0 + no borrow
3679   if (N0 == N1)
3680     return CombineTo(N, DAG.getConstant(0, DL, VT),
3681                      DAG.getConstant(0, DL, CarryVT));
3682
3683   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3684
3685   // fold (subox, c) -> (addo x, -c)
3686   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3687     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3688                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3689   }
3690
3691   // fold (subo x, 0) -> x + no borrow
3692   if (isNullOrNullSplat(N1))
3693     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3694
3695   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3696   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3697     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3698                      DAG.getConstant(0, DL, CarryVT));
3699
3700   return SDValue();
3701 }
3702
3703 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3704   SDValue N0 = N->getOperand(0);
3705   SDValue N1 = N->getOperand(1);
3706   SDValue CarryIn = N->getOperand(2);
3707
3708   // fold (sube x, y, false) -> (subc x, y)
3709   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3710     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3711
3712   return SDValue();
3713 }
3714
3715 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3716   SDValue N0 = N->getOperand(0);
3717   SDValue N1 = N->getOperand(1);
3718   SDValue CarryIn = N->getOperand(2);
3719
3720   // fold (subcarry x, y, false) -> (usubo x, y)
3721   if (isNullConstant(CarryIn)) {
3722     if (!LegalOperations ||
3723         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3724       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3725   }
3726
3727   return SDValue();
3728 }
3729
3730 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3731   SDValue N0 = N->getOperand(0);
3732   SDValue N1 = N->getOperand(1);
3733   SDValue CarryIn = N->getOperand(2);
3734
3735   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3736   if (isNullConstant(CarryIn)) {
3737     if (!LegalOperations ||
3738         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3739       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3740   }
3741
3742   return SDValue();
3743 }
3744
3745 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3746 // UMULFIXSAT here.
3747 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3748   SDValue N0 = N->getOperand(0);
3749   SDValue N1 = N->getOperand(1);
3750   SDValue Scale = N->getOperand(2);
3751   EVT VT = N0.getValueType();
3752
3753   // fold (mulfix x, undef, scale) -> 0
3754   if (N0.isUndef() || N1.isUndef())
3755     return DAG.getConstant(0, SDLoc(N), VT);
3756
3757   // Canonicalize constant to RHS (vector doesn't have to splat)
3758   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3759      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3760     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3761
3762   // fold (mulfix x, 0, scale) -> 0
3763   if (isNullConstant(N1))
3764     return DAG.getConstant(0, SDLoc(N), VT);
3765
3766   return SDValue();
3767 }
3768
3769 SDValue DAGCombiner::visitMUL(SDNode *N) {
3770   SDValue N0 = N->getOperand(0);
3771   SDValue N1 = N->getOperand(1);
3772   EVT VT = N0.getValueType();
3773
3774   // fold (mul x, undef) -> 0
3775   if (N0.isUndef() || N1.isUndef())
3776     return DAG.getConstant(0, SDLoc(N), VT);
3777
3778   // fold (mul c1, c2) -> c1*c2
3779   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3780     return C;
3781
3782   // canonicalize constant to RHS (vector doesn't have to splat)
3783   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3784       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3785     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3786
3787   bool N1IsConst = false;
3788   bool N1IsOpaqueConst = false;
3789   APInt ConstValue1;
3790
3791   // fold vector ops
3792   if (VT.isVector()) {
3793     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
3794       return FoldedVOp;
3795
3796     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3797     assert((!N1IsConst ||
3798             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3799            "Splat APInt should be element width");
3800   } else {
3801     N1IsConst = isa<ConstantSDNode>(N1);
3802     if (N1IsConst) {
3803       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3804       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3805     }
3806   }
3807
3808   // fold (mul x, 0) -> 0
3809   if (N1IsConst && ConstValue1.isZero())
3810     return N1;
3811
3812   // fold (mul x, 1) -> x
3813   if (N1IsConst && ConstValue1.isOne())
3814     return N0;
3815
3816   if (SDValue NewSel = foldBinOpIntoSelect(N))
3817     return NewSel;
3818
3819   // fold (mul x, -1) -> 0-x
3820   if (N1IsConst && ConstValue1.isAllOnes()) {
3821     SDLoc DL(N);
3822     return DAG.getNode(ISD::SUB, DL, VT,
3823                        DAG.getConstant(0, DL, VT), N0);
3824   }
3825
3826   // fold (mul x, (1 << c)) -> x << c
3827   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3828       DAG.isKnownToBeAPowerOfTwo(N1) &&
3829       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3830     SDLoc DL(N);
3831     SDValue LogBase2 = BuildLogBase2(N1, DL);
3832     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3833     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3834     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3835   }
3836
3837   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3838   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
3839     unsigned Log2Val = (-ConstValue1).logBase2();
3840     SDLoc DL(N);
3841     // FIXME: If the input is something that is easily negated (e.g. a
3842     // single-use add), we should put the negate there.
3843     return DAG.getNode(ISD::SUB, DL, VT,
3844                        DAG.getConstant(0, DL, VT),
3845                        DAG.getNode(ISD::SHL, DL, VT, N0,
3846                             DAG.getConstant(Log2Val, DL,
3847                                       getShiftAmountTy(N0.getValueType()))));
3848   }
3849
3850   // Try to transform:
3851   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3852   // mul x, (2^N + 1) --> add (shl x, N), x
3853   // mul x, (2^N - 1) --> sub (shl x, N), x
3854   // Examples: x * 33 --> (x << 5) + x
3855   //           x * 15 --> (x << 4) - x
3856   //           x * -33 --> -((x << 5) + x)
3857   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3858   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3859   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3860   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3861   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3862   //           x * 0xf800 --> (x << 16) - (x << 11)
3863   //           x * -0x8800 --> -((x << 15) + (x << 11))
3864   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3865   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3866     // TODO: We could handle more general decomposition of any constant by
3867     //       having the target set a limit on number of ops and making a
3868     //       callback to determine that sequence (similar to sqrt expansion).
3869     unsigned MathOp = ISD::DELETED_NODE;
3870     APInt MulC = ConstValue1.abs();
3871     // The constant `2` should be treated as (2^0 + 1).
3872     unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3873     MulC.lshrInPlace(TZeros);
3874     if ((MulC - 1).isPowerOf2())
3875       MathOp = ISD::ADD;
3876     else if ((MulC + 1).isPowerOf2())
3877       MathOp = ISD::SUB;
3878
3879     if (MathOp != ISD::DELETED_NODE) {
3880       unsigned ShAmt =
3881           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3882       ShAmt += TZeros;
3883       assert(ShAmt < VT.getScalarSizeInBits() &&
3884              "multiply-by-constant generated out of bounds shift");
3885       SDLoc DL(N);
3886       SDValue Shl =
3887           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3888       SDValue R =
3889           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3890                                DAG.getNode(ISD::SHL, DL, VT, N0,
3891                                            DAG.getConstant(TZeros, DL, VT)))
3892                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
3893       if (ConstValue1.isNegative())
3894         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3895       return R;
3896     }
3897   }
3898
3899   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3900   if (N0.getOpcode() == ISD::SHL &&
3901       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3902       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3903     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3904     if (isConstantOrConstantVector(C3))
3905       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3906   }
3907
3908   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3909   // use.
3910   {
3911     SDValue Sh, Y;
3912
3913     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3914     if (N0.getOpcode() == ISD::SHL &&
3915         isConstantOrConstantVector(N0.getOperand(1)) &&
3916         N0.getNode()->hasOneUse()) {
3917       Sh = N0; Y = N1;
3918     } else if (N1.getOpcode() == ISD::SHL &&
3919                isConstantOrConstantVector(N1.getOperand(1)) &&
3920                N1.getNode()->hasOneUse()) {
3921       Sh = N1; Y = N0;
3922     }
3923
3924     if (Sh.getNode()) {
3925       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3926       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3927     }
3928   }
3929
3930   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3931   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3932       N0.getOpcode() == ISD::ADD &&
3933       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3934       isMulAddWithConstProfitable(N, N0, N1))
3935       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3936                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3937                                      N0.getOperand(0), N1),
3938                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3939                                      N0.getOperand(1), N1));
3940
3941   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3942   if (N0.getOpcode() == ISD::VSCALE)
3943     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3944       const APInt &C0 = N0.getConstantOperandAPInt(0);
3945       const APInt &C1 = NC1->getAPIntValue();
3946       return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3947     }
3948
3949   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
3950   APInt MulVal;
3951   if (N0.getOpcode() == ISD::STEP_VECTOR)
3952     if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
3953       const APInt &C0 = N0.getConstantOperandAPInt(0);
3954       APInt NewStep = C0 * MulVal;
3955       return DAG.getStepVector(SDLoc(N), VT, NewStep);
3956     }
3957
3958   // Fold ((mul x, 0/undef) -> 0,
3959   //       (mul x, 1) -> x) -> x)
3960   // -> and(x, mask)
3961   // We can replace vectors with '0' and '1' factors with a clearing mask.
3962   if (VT.isFixedLengthVector()) {
3963     unsigned NumElts = VT.getVectorNumElements();
3964     SmallBitVector ClearMask;
3965     ClearMask.reserve(NumElts);
3966     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3967       if (!V || V->isZero()) {
3968         ClearMask.push_back(true);
3969         return true;
3970       }
3971       ClearMask.push_back(false);
3972       return V->isOne();
3973     };
3974     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3975         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3976       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
3977       SDLoc DL(N);
3978       EVT LegalSVT = N1.getOperand(0).getValueType();
3979       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3980       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3981       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3982       for (unsigned I = 0; I != NumElts; ++I)
3983         if (ClearMask[I])
3984           Mask[I] = Zero;
3985       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3986     }
3987   }
3988
3989   // reassociate mul
3990   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3991     return RMUL;
3992
3993   return SDValue();
3994 }
3995
3996 /// Return true if divmod libcall is available.
3997 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3998                                      const TargetLowering &TLI) {
3999   RTLIB::Libcall LC;
4000   EVT NodeType = Node->getValueType(0);
4001   if (!NodeType.isSimple())
4002     return false;
4003   switch (NodeType.getSimpleVT().SimpleTy) {
4004   default: return false; // No libcall for vector types.
4005   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
4006   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4007   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4008   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4009   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4010   }
4011
4012   return TLI.getLibcallName(LC) != nullptr;
4013 }
4014
4015 /// Issue divrem if both quotient and remainder are needed.
4016 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4017   if (Node->use_empty())
4018     return SDValue(); // This is a dead node, leave it alone.
4019
4020   unsigned Opcode = Node->getOpcode();
4021   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4022   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4023
4024   // DivMod lib calls can still work on non-legal types if using lib-calls.
4025   EVT VT = Node->getValueType(0);
4026   if (VT.isVector() || !VT.isInteger())
4027     return SDValue();
4028
4029   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4030     return SDValue();
4031
4032   // If DIVREM is going to get expanded into a libcall,
4033   // but there is no libcall available, then don't combine.
4034   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4035       !isDivRemLibcallAvailable(Node, isSigned, TLI))
4036     return SDValue();
4037
4038   // If div is legal, it's better to do the normal expansion
4039   unsigned OtherOpcode = 0;
4040   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4041     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4042     if (TLI.isOperationLegalOrCustom(Opcode, VT))
4043       return SDValue();
4044   } else {
4045     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4046     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4047       return SDValue();
4048   }
4049
4050   SDValue Op0 = Node->getOperand(0);
4051   SDValue Op1 = Node->getOperand(1);
4052   SDValue combined;
4053   for (SDNode *User : Op0.getNode()->uses()) {
4054     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4055         User->use_empty())
4056       continue;
4057     // Convert the other matching node(s), too;
4058     // otherwise, the DIVREM may get target-legalized into something
4059     // target-specific that we won't be able to recognize.
4060     unsigned UserOpc = User->getOpcode();
4061     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4062         User->getOperand(0) == Op0 &&
4063         User->getOperand(1) == Op1) {
4064       if (!combined) {
4065         if (UserOpc == OtherOpcode) {
4066           SDVTList VTs = DAG.getVTList(VT, VT);
4067           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4068         } else if (UserOpc == DivRemOpc) {
4069           combined = SDValue(User, 0);
4070         } else {
4071           assert(UserOpc == Opcode);
4072           continue;
4073         }
4074       }
4075       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4076         CombineTo(User, combined);
4077       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4078         CombineTo(User, combined.getValue(1));
4079     }
4080   }
4081   return combined;
4082 }
4083
4084 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4085   SDValue N0 = N->getOperand(0);
4086   SDValue N1 = N->getOperand(1);
4087   EVT VT = N->getValueType(0);
4088   SDLoc DL(N);
4089
4090   unsigned Opc = N->getOpcode();
4091   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4092   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4093
4094   // X / undef -> undef
4095   // X % undef -> undef
4096   // X / 0 -> undef
4097   // X % 0 -> undef
4098   // NOTE: This includes vectors where any divisor element is zero/undef.
4099   if (DAG.isUndef(Opc, {N0, N1}))
4100     return DAG.getUNDEF(VT);
4101
4102   // undef / X -> 0
4103   // undef % X -> 0
4104   if (N0.isUndef())
4105     return DAG.getConstant(0, DL, VT);
4106
4107   // 0 / X -> 0
4108   // 0 % X -> 0
4109   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4110   if (N0C && N0C->isZero())
4111     return N0;
4112
4113   // X / X -> 1
4114   // X % X -> 0
4115   if (N0 == N1)
4116     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4117
4118   // X / 1 -> X
4119   // X % 1 -> 0
4120   // If this is a boolean op (single-bit element type), we can't have
4121   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4122   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4123   // it's a 1.
4124   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4125     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4126
4127   return SDValue();
4128 }
4129
4130 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4131   SDValue N0 = N->getOperand(0);
4132   SDValue N1 = N->getOperand(1);
4133   EVT VT = N->getValueType(0);
4134   EVT CCVT = getSetCCResultType(VT);
4135   SDLoc DL(N);
4136
4137   // fold (sdiv c1, c2) -> c1/c2
4138   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4139     return C;
4140
4141   // fold vector ops
4142   if (VT.isVector())
4143     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4144       return FoldedVOp;
4145
4146   // fold (sdiv X, -1) -> 0-X
4147   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4148   if (N1C && N1C->isAllOnes())
4149     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4150
4151   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4152   if (N1C && N1C->getAPIntValue().isMinSignedValue())
4153     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4154                          DAG.getConstant(1, DL, VT),
4155                          DAG.getConstant(0, DL, VT));
4156
4157   if (SDValue V = simplifyDivRem(N, DAG))
4158     return V;
4159
4160   if (SDValue NewSel = foldBinOpIntoSelect(N))
4161     return NewSel;
4162
4163   // If we know the sign bits of both operands are zero, strength reduce to a
4164   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4165   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4166     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4167
4168   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4169     // If the corresponding remainder node exists, update its users with
4170     // (Dividend - (Quotient * Divisor).
4171     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4172                                               { N0, N1 })) {
4173       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4174       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4175       AddToWorklist(Mul.getNode());
4176       AddToWorklist(Sub.getNode());
4177       CombineTo(RemNode, Sub);
4178     }
4179     return V;
4180   }
4181
4182   // sdiv, srem -> sdivrem
4183   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4184   // true.  Otherwise, we break the simplification logic in visitREM().
4185   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4186   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4187     if (SDValue DivRem = useDivRem(N))
4188         return DivRem;
4189
4190   return SDValue();
4191 }
4192
4193 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4194   SDLoc DL(N);
4195   EVT VT = N->getValueType(0);
4196   EVT CCVT = getSetCCResultType(VT);
4197   unsigned BitWidth = VT.getScalarSizeInBits();
4198
4199   // Helper for determining whether a value is a power-2 constant scalar or a
4200   // vector of such elements.
4201   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4202     if (C->isZero() || C->isOpaque())
4203       return false;
4204     if (C->getAPIntValue().isPowerOf2())
4205       return true;
4206     if (C->getAPIntValue().isNegatedPowerOf2())
4207       return true;
4208     return false;
4209   };
4210
4211   // fold (sdiv X, pow2) -> simple ops after legalize
4212   // FIXME: We check for the exact bit here because the generic lowering gives
4213   // better results in that case. The target-specific lowering should learn how
4214   // to handle exact sdivs efficiently.
4215   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4216     // Target-specific implementation of sdiv x, pow2.
4217     if (SDValue Res = BuildSDIVPow2(N))
4218       return Res;
4219
4220     // Create constants that are functions of the shift amount value.
4221     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4222     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4223     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4224     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4225     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4226     if (!isConstantOrConstantVector(Inexact))
4227       return SDValue();
4228
4229     // Splat the sign bit into the register
4230     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4231                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4232     AddToWorklist(Sign.getNode());
4233
4234     // Add (N0 < 0) ? abs2 - 1 : 0;
4235     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4236     AddToWorklist(Srl.getNode());
4237     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4238     AddToWorklist(Add.getNode());
4239     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4240     AddToWorklist(Sra.getNode());
4241
4242     // Special case: (sdiv X, 1) -> X
4243     // Special Case: (sdiv X, -1) -> 0-X
4244     SDValue One = DAG.getConstant(1, DL, VT);
4245     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4246     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4247     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4248     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4249     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4250
4251     // If dividing by a positive value, we're done. Otherwise, the result must
4252     // be negated.
4253     SDValue Zero = DAG.getConstant(0, DL, VT);
4254     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4255
4256     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4257     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4258     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4259     return Res;
4260   }
4261
4262   // If integer divide is expensive and we satisfy the requirements, emit an
4263   // alternate sequence.  Targets may check function attributes for size/speed
4264   // trade-offs.
4265   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4266   if (isConstantOrConstantVector(N1) &&
4267       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4268     if (SDValue Op = BuildSDIV(N))
4269       return Op;
4270
4271   return SDValue();
4272 }
4273
4274 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4275   SDValue N0 = N->getOperand(0);
4276   SDValue N1 = N->getOperand(1);
4277   EVT VT = N->getValueType(0);
4278   EVT CCVT = getSetCCResultType(VT);
4279   SDLoc DL(N);
4280
4281   // fold (udiv c1, c2) -> c1/c2
4282   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4283     return C;
4284
4285   // fold vector ops
4286   if (VT.isVector())
4287     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4288       return FoldedVOp;
4289
4290   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4291   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4292   if (N1C && N1C->isAllOnes())
4293     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4294                          DAG.getConstant(1, DL, VT),
4295                          DAG.getConstant(0, DL, VT));
4296
4297   if (SDValue V = simplifyDivRem(N, DAG))
4298     return V;
4299
4300   if (SDValue NewSel = foldBinOpIntoSelect(N))
4301     return NewSel;
4302
4303   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4304     // If the corresponding remainder node exists, update its users with
4305     // (Dividend - (Quotient * Divisor).
4306     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4307                                               { N0, N1 })) {
4308       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4309       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4310       AddToWorklist(Mul.getNode());
4311       AddToWorklist(Sub.getNode());
4312       CombineTo(RemNode, Sub);
4313     }
4314     return V;
4315   }
4316
4317   // sdiv, srem -> sdivrem
4318   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4319   // true.  Otherwise, we break the simplification logic in visitREM().
4320   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4321   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4322     if (SDValue DivRem = useDivRem(N))
4323         return DivRem;
4324
4325   return SDValue();
4326 }
4327
4328 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4329   SDLoc DL(N);
4330   EVT VT = N->getValueType(0);
4331
4332   // fold (udiv x, (1 << c)) -> x >>u c
4333   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4334       DAG.isKnownToBeAPowerOfTwo(N1)) {
4335     SDValue LogBase2 = BuildLogBase2(N1, DL);
4336     AddToWorklist(LogBase2.getNode());
4337
4338     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4339     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4340     AddToWorklist(Trunc.getNode());
4341     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4342   }
4343
4344   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4345   if (N1.getOpcode() == ISD::SHL) {
4346     SDValue N10 = N1.getOperand(0);
4347     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4348         DAG.isKnownToBeAPowerOfTwo(N10)) {
4349       SDValue LogBase2 = BuildLogBase2(N10, DL);
4350       AddToWorklist(LogBase2.getNode());
4351
4352       EVT ADDVT = N1.getOperand(1).getValueType();
4353       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4354       AddToWorklist(Trunc.getNode());
4355       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4356       AddToWorklist(Add.getNode());
4357       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4358     }
4359   }
4360
4361   // fold (udiv x, c) -> alternate
4362   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4363   if (isConstantOrConstantVector(N1) &&
4364       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4365     if (SDValue Op = BuildUDIV(N))
4366       return Op;
4367
4368   return SDValue();
4369 }
4370
4371 // handles ISD::SREM and ISD::UREM
4372 SDValue DAGCombiner::visitREM(SDNode *N) {
4373   unsigned Opcode = N->getOpcode();
4374   SDValue N0 = N->getOperand(0);
4375   SDValue N1 = N->getOperand(1);
4376   EVT VT = N->getValueType(0);
4377   EVT CCVT = getSetCCResultType(VT);
4378
4379   bool isSigned = (Opcode == ISD::SREM);
4380   SDLoc DL(N);
4381
4382   // fold (rem c1, c2) -> c1%c2
4383   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4384   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4385     return C;
4386
4387   // fold (urem X, -1) -> select(X == -1, 0, x)
4388   if (!isSigned && N1C && N1C->isAllOnes())
4389     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4390                          DAG.getConstant(0, DL, VT), N0);
4391
4392   if (SDValue V = simplifyDivRem(N, DAG))
4393     return V;
4394
4395   if (SDValue NewSel = foldBinOpIntoSelect(N))
4396     return NewSel;
4397
4398   if (isSigned) {
4399     // If we know the sign bits of both operands are zero, strength reduce to a
4400     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4401     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4402       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4403   } else {
4404     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4405       // fold (urem x, pow2) -> (and x, pow2-1)
4406       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4407       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4408       AddToWorklist(Add.getNode());
4409       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4410     }
4411     if (N1.getOpcode() == ISD::SHL &&
4412         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4413       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4414       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4415       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4416       AddToWorklist(Add.getNode());
4417       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4418     }
4419   }
4420
4421   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4422
4423   // If X/C can be simplified by the division-by-constant logic, lower
4424   // X%C to the equivalent of X-X/C*C.
4425   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4426   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4427   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4428   // combine will not return a DIVREM.  Regardless, checking cheapness here
4429   // makes sense since the simplification results in fatter code.
4430   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4431     SDValue OptimizedDiv =
4432         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4433     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4434       // If the equivalent Div node also exists, update its users.
4435       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4436       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4437                                                 { N0, N1 }))
4438         CombineTo(DivNode, OptimizedDiv);
4439       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4440       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4441       AddToWorklist(OptimizedDiv.getNode());
4442       AddToWorklist(Mul.getNode());
4443       return Sub;
4444     }
4445   }
4446
4447   // sdiv, srem -> sdivrem
4448   if (SDValue DivRem = useDivRem(N))
4449     return DivRem.getValue(1);
4450
4451   return SDValue();
4452 }
4453
4454 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4455   SDValue N0 = N->getOperand(0);
4456   SDValue N1 = N->getOperand(1);
4457   EVT VT = N->getValueType(0);
4458   SDLoc DL(N);
4459
4460   // fold (mulhs c1, c2)
4461   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4462     return C;
4463
4464   // canonicalize constant to RHS.
4465   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4466       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4467     return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4468
4469   if (VT.isVector()) {
4470     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4471       return FoldedVOp;
4472
4473     // fold (mulhs x, 0) -> 0
4474     // do not return N0/N1, because undef node may exist.
4475     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4476         ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4477       return DAG.getConstant(0, DL, VT);
4478   }
4479
4480   // fold (mulhs x, 0) -> 0
4481   if (isNullConstant(N1))
4482     return N1;
4483   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4484   if (isOneConstant(N1))
4485     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4486                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4487                                        getShiftAmountTy(N0.getValueType())));
4488
4489   // fold (mulhs x, undef) -> 0
4490   if (N0.isUndef() || N1.isUndef())
4491     return DAG.getConstant(0, DL, VT);
4492
4493   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4494   // plus a shift.
4495   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4496       !VT.isVector()) {
4497     MVT Simple = VT.getSimpleVT();
4498     unsigned SimpleSize = Simple.getSizeInBits();
4499     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4500     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4501       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4502       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4503       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4504       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4505             DAG.getConstant(SimpleSize, DL,
4506                             getShiftAmountTy(N1.getValueType())));
4507       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4508     }
4509   }
4510
4511   return SDValue();
4512 }
4513
4514 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4515   SDValue N0 = N->getOperand(0);
4516   SDValue N1 = N->getOperand(1);
4517   EVT VT = N->getValueType(0);
4518   SDLoc DL(N);
4519
4520   // fold (mulhu c1, c2)
4521   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4522     return C;
4523
4524   // canonicalize constant to RHS.
4525   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4526       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4527     return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4528
4529   if (VT.isVector()) {
4530     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4531       return FoldedVOp;
4532
4533     // fold (mulhu x, 0) -> 0
4534     // do not return N0/N1, because undef node may exist.
4535     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4536         ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4537       return DAG.getConstant(0, DL, VT);
4538   }
4539
4540   // fold (mulhu x, 0) -> 0
4541   if (isNullConstant(N1))
4542     return N1;
4543   // fold (mulhu x, 1) -> 0
4544   if (isOneConstant(N1))
4545     return DAG.getConstant(0, DL, N0.getValueType());
4546   // fold (mulhu x, undef) -> 0
4547   if (N0.isUndef() || N1.isUndef())
4548     return DAG.getConstant(0, DL, VT);
4549
4550   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4551   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4552       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4553     unsigned NumEltBits = VT.getScalarSizeInBits();
4554     SDValue LogBase2 = BuildLogBase2(N1, DL);
4555     SDValue SRLAmt = DAG.getNode(
4556         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4557     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4558     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4559     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4560   }
4561
4562   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4563   // plus a shift.
4564   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4565       !VT.isVector()) {
4566     MVT Simple = VT.getSimpleVT();
4567     unsigned SimpleSize = Simple.getSizeInBits();
4568     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4569     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4570       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4571       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4572       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4573       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4574             DAG.getConstant(SimpleSize, DL,
4575                             getShiftAmountTy(N1.getValueType())));
4576       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4577     }
4578   }
4579
4580   // Simplify the operands using demanded-bits information.
4581   // We don't have demanded bits support for MULHU so this just enables constant
4582   // folding based on known bits.
4583   if (SimplifyDemandedBits(SDValue(N, 0)))
4584     return SDValue(N, 0);
4585
4586   return SDValue();
4587 }
4588
4589 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4590 /// give the opcodes for the two computations that are being performed. Return
4591 /// true if a simplification was made.
4592 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4593                                                 unsigned HiOp) {
4594   // If the high half is not needed, just compute the low half.
4595   bool HiExists = N->hasAnyUseOfValue(1);
4596   if (!HiExists && (!LegalOperations ||
4597                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4598     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4599     return CombineTo(N, Res, Res);
4600   }
4601
4602   // If the low half is not needed, just compute the high half.
4603   bool LoExists = N->hasAnyUseOfValue(0);
4604   if (!LoExists && (!LegalOperations ||
4605                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4606     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4607     return CombineTo(N, Res, Res);
4608   }
4609
4610   // If both halves are used, return as it is.
4611   if (LoExists && HiExists)
4612     return SDValue();
4613
4614   // If the two computed results can be simplified separately, separate them.
4615   if (LoExists) {
4616     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4617     AddToWorklist(Lo.getNode());
4618     SDValue LoOpt = combine(Lo.getNode());
4619     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4620         (!LegalOperations ||
4621          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4622       return CombineTo(N, LoOpt, LoOpt);
4623   }
4624
4625   if (HiExists) {
4626     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4627     AddToWorklist(Hi.getNode());
4628     SDValue HiOpt = combine(Hi.getNode());
4629     if (HiOpt.getNode() && HiOpt != Hi &&
4630         (!LegalOperations ||
4631          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4632       return CombineTo(N, HiOpt, HiOpt);
4633   }
4634
4635   return SDValue();
4636 }
4637
4638 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4639   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4640     return Res;
4641
4642   EVT VT = N->getValueType(0);
4643   SDLoc DL(N);
4644
4645   // If the type is twice as wide is legal, transform the mulhu to a wider
4646   // multiply plus a shift.
4647   if (VT.isSimple() && !VT.isVector()) {
4648     MVT Simple = VT.getSimpleVT();
4649     unsigned SimpleSize = Simple.getSizeInBits();
4650     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4651     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4652       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4653       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4654       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4655       // Compute the high part as N1.
4656       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4657             DAG.getConstant(SimpleSize, DL,
4658                             getShiftAmountTy(Lo.getValueType())));
4659       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4660       // Compute the low part as N0.
4661       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4662       return CombineTo(N, Lo, Hi);
4663     }
4664   }
4665
4666   return SDValue();
4667 }
4668
4669 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4670   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4671     return Res;
4672
4673   EVT VT = N->getValueType(0);
4674   SDLoc DL(N);
4675
4676   // (umul_lohi N0, 0) -> (0, 0)
4677   if (isNullConstant(N->getOperand(1))) {
4678     SDValue Zero = DAG.getConstant(0, DL, VT);
4679     return CombineTo(N, Zero, Zero);
4680   }
4681
4682   // (umul_lohi N0, 1) -> (N0, 0)
4683   if (isOneConstant(N->getOperand(1))) {
4684     SDValue Zero = DAG.getConstant(0, DL, VT);
4685     return CombineTo(N, N->getOperand(0), Zero);
4686   }
4687
4688   // If the type is twice as wide is legal, transform the mulhu to a wider
4689   // multiply plus a shift.
4690   if (VT.isSimple() && !VT.isVector()) {
4691     MVT Simple = VT.getSimpleVT();
4692     unsigned SimpleSize = Simple.getSizeInBits();
4693     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4694     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4695       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4696       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4697       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4698       // Compute the high part as N1.
4699       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4700             DAG.getConstant(SimpleSize, DL,
4701                             getShiftAmountTy(Lo.getValueType())));
4702       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4703       // Compute the low part as N0.
4704       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4705       return CombineTo(N, Lo, Hi);
4706     }
4707   }
4708
4709   return SDValue();
4710 }
4711
4712 SDValue DAGCombiner::visitMULO(SDNode *N) {
4713   SDValue N0 = N->getOperand(0);
4714   SDValue N1 = N->getOperand(1);
4715   EVT VT = N0.getValueType();
4716   bool IsSigned = (ISD::SMULO == N->getOpcode());
4717
4718   EVT CarryVT = N->getValueType(1);
4719   SDLoc DL(N);
4720
4721   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4722   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4723
4724   // fold operation with constant operands.
4725   // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4726   // multiple results.
4727   if (N0C && N1C) {
4728     bool Overflow;
4729     APInt Result =
4730         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4731                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4732     return CombineTo(N, DAG.getConstant(Result, DL, VT),
4733                      DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4734   }
4735
4736   // canonicalize constant to RHS.
4737   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4738       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4739     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4740
4741   // fold (mulo x, 0) -> 0 + no carry out
4742   if (isNullOrNullSplat(N1))
4743     return CombineTo(N, DAG.getConstant(0, DL, VT),
4744                      DAG.getConstant(0, DL, CarryVT));
4745
4746   // (mulo x, 2) -> (addo x, x)
4747   if (N1C && N1C->getAPIntValue() == 2)
4748     return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4749                        N->getVTList(), N0, N0);
4750
4751   if (IsSigned) {
4752     // A 1 bit SMULO overflows if both inputs are 1.
4753     if (VT.getScalarSizeInBits() == 1) {
4754       SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4755       return CombineTo(N, And,
4756                        DAG.getSetCC(DL, CarryVT, And,
4757                                     DAG.getConstant(0, DL, VT), ISD::SETNE));
4758     }
4759
4760     // Multiplying n * m significant bits yields a result of n + m significant
4761     // bits. If the total number of significant bits does not exceed the
4762     // result bit width (minus 1), there is no overflow.
4763     unsigned SignBits = DAG.ComputeNumSignBits(N0);
4764     if (SignBits > 1)
4765       SignBits += DAG.ComputeNumSignBits(N1);
4766     if (SignBits > VT.getScalarSizeInBits() + 1)
4767       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4768                        DAG.getConstant(0, DL, CarryVT));
4769   } else {
4770     KnownBits N1Known = DAG.computeKnownBits(N1);
4771     KnownBits N0Known = DAG.computeKnownBits(N0);
4772     bool Overflow;
4773     (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4774     if (!Overflow)
4775       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4776                        DAG.getConstant(0, DL, CarryVT));
4777   }
4778
4779   return SDValue();
4780 }
4781
4782 // Function to calculate whether the Min/Max pair of SDNodes (potentially
4783 // swapped around) make a signed saturate pattern, clamping to between a signed
4784 // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
4785 // Returns the node being clamped and the bitwidth of the clamp in BW. Should
4786 // work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
4787 // same as SimplifySelectCC. N0<N1 ? N2 : N3.
4788 static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
4789                                   SDValue N3, ISD::CondCode CC, unsigned &BW,
4790                                   bool &Unsigned) {
4791   auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
4792                             ISD::CondCode CC) {
4793     // The compare and select operand should be the same or the select operands
4794     // should be truncated versions of the comparison.
4795     if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
4796       return 0;
4797     // The constants need to be the same or a truncated version of each other.
4798     ConstantSDNode *N1C = isConstOrConstSplat(N1);
4799     ConstantSDNode *N3C = isConstOrConstSplat(N3);
4800     if (!N1C || !N3C)
4801       return 0;
4802     const APInt &C1 = N1C->getAPIntValue();
4803     const APInt &C2 = N3C->getAPIntValue();
4804     if (C1.getBitWidth() < C2.getBitWidth() ||
4805         C1 != C2.sextOrSelf(C1.getBitWidth()))
4806       return 0;
4807     return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
4808   };
4809
4810   // Check the initial value is a SMIN/SMAX equivalent.
4811   unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
4812   if (!Opcode0)
4813     return SDValue();
4814
4815   SDValue N00, N01, N02, N03;
4816   ISD::CondCode N0CC;
4817   switch (N0.getOpcode()) {
4818   case ISD::SMIN:
4819   case ISD::SMAX:
4820     N00 = N02 = N0.getOperand(0);
4821     N01 = N03 = N0.getOperand(1);
4822     N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
4823     break;
4824   case ISD::SELECT_CC:
4825     N00 = N0.getOperand(0);
4826     N01 = N0.getOperand(1);
4827     N02 = N0.getOperand(2);
4828     N03 = N0.getOperand(3);
4829     N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
4830     break;
4831   case ISD::SELECT:
4832   case ISD::VSELECT:
4833     if (N0.getOperand(0).getOpcode() != ISD::SETCC)
4834       return SDValue();
4835     N00 = N0.getOperand(0).getOperand(0);
4836     N01 = N0.getOperand(0).getOperand(1);
4837     N02 = N0.getOperand(1);
4838     N03 = N0.getOperand(2);
4839     N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
4840     break;
4841   default:
4842     return SDValue();
4843   }
4844
4845   unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
4846   if (!Opcode1 || Opcode0 == Opcode1)
4847     return SDValue();
4848
4849   ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
4850   ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
4851   if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
4852     return SDValue();
4853
4854   const APInt &MinC = MinCOp->getAPIntValue();
4855   const APInt &MaxC = MaxCOp->getAPIntValue();
4856   APInt MinCPlus1 = MinC + 1;
4857   if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
4858     BW = MinCPlus1.exactLogBase2() + 1;
4859     Unsigned = false;
4860     return N02;
4861   }
4862
4863   if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
4864     BW = MinCPlus1.exactLogBase2();
4865     Unsigned = true;
4866     return N02;
4867   }
4868
4869   return SDValue();
4870 }
4871
4872 static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
4873                                            SDValue N3, ISD::CondCode CC,
4874                                            SelectionDAG &DAG) {
4875   unsigned BW;
4876   bool Unsigned;
4877   SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
4878   if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
4879     return SDValue();
4880   EVT FPVT = Fp.getOperand(0).getValueType();
4881   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
4882   if (FPVT.isVector())
4883     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
4884                              FPVT.getVectorElementCount());
4885   unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
4886   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
4887     return SDValue();
4888   SDLoc DL(Fp);
4889   SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
4890                             DAG.getValueType(NewVT.getScalarType()));
4891   return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
4892                   : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
4893 }
4894
4895 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4896   SDValue N0 = N->getOperand(0);
4897   SDValue N1 = N->getOperand(1);
4898   EVT VT = N0.getValueType();
4899   unsigned Opcode = N->getOpcode();
4900   SDLoc DL(N);
4901
4902   // fold operation with constant operands.
4903   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4904     return C;
4905
4906   // canonicalize constant to RHS
4907   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4908       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4909     return DAG.getNode(Opcode, DL, VT, N1, N0);
4910
4911   // fold vector ops
4912   if (VT.isVector())
4913     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4914       return FoldedVOp;
4915
4916   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4917   // Only do this if the current op isn't legal and the flipped is.
4918   if (!TLI.isOperationLegal(Opcode, VT) &&
4919       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4920       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4921     unsigned AltOpcode;
4922     switch (Opcode) {
4923     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4924     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4925     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4926     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4927     default: llvm_unreachable("Unknown MINMAX opcode");
4928     }
4929     if (TLI.isOperationLegal(AltOpcode, VT))
4930       return DAG.getNode(AltOpcode, DL, VT, N0, N1);
4931   }
4932
4933   if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
4934     if (SDValue S = PerformMinMaxFpToSatCombine(
4935             N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
4936       return S;
4937
4938   // Simplify the operands using demanded-bits information.
4939   if (SimplifyDemandedBits(SDValue(N, 0)))
4940     return SDValue(N, 0);
4941
4942   return SDValue();
4943 }
4944
4945 /// If this is a bitwise logic instruction and both operands have the same
4946 /// opcode, try to sink the other opcode after the logic instruction.
4947 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4948   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4949   EVT VT = N0.getValueType();
4950   unsigned LogicOpcode = N->getOpcode();
4951   unsigned HandOpcode = N0.getOpcode();
4952   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4953           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4954   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4955
4956   // Bail early if none of these transforms apply.
4957   if (N0.getNumOperands() == 0)
4958     return SDValue();
4959
4960   // FIXME: We should check number of uses of the operands to not increase
4961   //        the instruction count for all transforms.
4962
4963   // Handle size-changing casts.
4964   SDValue X = N0.getOperand(0);
4965   SDValue Y = N1.getOperand(0);
4966   EVT XVT = X.getValueType();
4967   SDLoc DL(N);
4968   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4969       HandOpcode == ISD::SIGN_EXTEND) {
4970     // If both operands have other uses, this transform would create extra
4971     // instructions without eliminating anything.
4972     if (!N0.hasOneUse() && !N1.hasOneUse())
4973       return SDValue();
4974     // We need matching integer source types.
4975     if (XVT != Y.getValueType())
4976       return SDValue();
4977     // Don't create an illegal op during or after legalization. Don't ever
4978     // create an unsupported vector op.
4979     if ((VT.isVector() || LegalOperations) &&
4980         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4981       return SDValue();
4982     // Avoid infinite looping with PromoteIntBinOp.
4983     // TODO: Should we apply desirable/legal constraints to all opcodes?
4984     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4985         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4986       return SDValue();
4987     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4988     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4989     return DAG.getNode(HandOpcode, DL, VT, Logic);
4990   }
4991
4992   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4993   if (HandOpcode == ISD::TRUNCATE) {
4994     // If both operands have other uses, this transform would create extra
4995     // instructions without eliminating anything.
4996     if (!N0.hasOneUse() && !N1.hasOneUse())
4997       return SDValue();
4998     // We need matching source types.
4999     if (XVT != Y.getValueType())
5000       return SDValue();
5001     // Don't create an illegal op during or after legalization.
5002     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5003       return SDValue();
5004     // Be extra careful sinking truncate. If it's free, there's no benefit in
5005     // widening a binop. Also, don't create a logic op on an illegal type.
5006     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5007       return SDValue();
5008     if (!TLI.isTypeLegal(XVT))
5009       return SDValue();
5010     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5011     return DAG.getNode(HandOpcode, DL, VT, Logic);
5012   }
5013
5014   // For binops SHL/SRL/SRA/AND:
5015   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5016   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5017        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5018       N0.getOperand(1) == N1.getOperand(1)) {
5019     // If either operand has other uses, this transform is not an improvement.
5020     if (!N0.hasOneUse() || !N1.hasOneUse())
5021       return SDValue();
5022     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5023     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5024   }
5025
5026   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5027   if (HandOpcode == ISD::BSWAP) {
5028     // If either operand has other uses, this transform is not an improvement.
5029     if (!N0.hasOneUse() || !N1.hasOneUse())
5030       return SDValue();
5031     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5032     return DAG.getNode(HandOpcode, DL, VT, Logic);
5033   }
5034
5035   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5036   // Only perform this optimization up until type legalization, before
5037   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5038   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5039   // we don't want to undo this promotion.
5040   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5041   // on scalars.
5042   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5043        Level <= AfterLegalizeTypes) {
5044     // Input types must be integer and the same.
5045     if (XVT.isInteger() && XVT == Y.getValueType() &&
5046         !(VT.isVector() && TLI.isTypeLegal(VT) &&
5047           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5048       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5049       return DAG.getNode(HandOpcode, DL, VT, Logic);
5050     }
5051   }
5052
5053   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5054   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5055   // If both shuffles use the same mask, and both shuffle within a single
5056   // vector, then it is worthwhile to move the swizzle after the operation.
5057   // The type-legalizer generates this pattern when loading illegal
5058   // vector types from memory. In many cases this allows additional shuffle
5059   // optimizations.
5060   // There are other cases where moving the shuffle after the xor/and/or
5061   // is profitable even if shuffles don't perform a swizzle.
5062   // If both shuffles use the same mask, and both shuffles have the same first
5063   // or second operand, then it might still be profitable to move the shuffle
5064   // after the xor/and/or operation.
5065   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5066     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5067     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5068     assert(X.getValueType() == Y.getValueType() &&
5069            "Inputs to shuffles are not the same type");
5070
5071     // Check that both shuffles use the same mask. The masks are known to be of
5072     // the same length because the result vector type is the same.
5073     // Check also that shuffles have only one use to avoid introducing extra
5074     // instructions.
5075     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5076         !SVN0->getMask().equals(SVN1->getMask()))
5077       return SDValue();
5078
5079     // Don't try to fold this node if it requires introducing a
5080     // build vector of all zeros that might be illegal at this stage.
5081     SDValue ShOp = N0.getOperand(1);
5082     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5083       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5084
5085     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5086     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5087       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5088                                   N0.getOperand(0), N1.getOperand(0));
5089       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5090     }
5091
5092     // Don't try to fold this node if it requires introducing a
5093     // build vector of all zeros that might be illegal at this stage.
5094     ShOp = N0.getOperand(0);
5095     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5096       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5097
5098     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5099     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5100       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5101                                   N1.getOperand(1));
5102       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5103     }
5104   }
5105
5106   return SDValue();
5107 }
5108
5109 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5110 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5111                                        const SDLoc &DL) {
5112   SDValue LL, LR, RL, RR, N0CC, N1CC;
5113   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5114       !isSetCCEquivalent(N1, RL, RR, N1CC))
5115     return SDValue();
5116
5117   assert(N0.getValueType() == N1.getValueType() &&
5118          "Unexpected operand types for bitwise logic op");
5119   assert(LL.getValueType() == LR.getValueType() &&
5120          RL.getValueType() == RR.getValueType() &&
5121          "Unexpected operand types for setcc");
5122
5123   // If we're here post-legalization or the logic op type is not i1, the logic
5124   // op type must match a setcc result type. Also, all folds require new
5125   // operations on the left and right operands, so those types must match.
5126   EVT VT = N0.getValueType();
5127   EVT OpVT = LL.getValueType();
5128   if (LegalOperations || VT.getScalarType() != MVT::i1)
5129     if (VT != getSetCCResultType(OpVT))
5130       return SDValue();
5131   if (OpVT != RL.getValueType())
5132     return SDValue();
5133
5134   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5135   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5136   bool IsInteger = OpVT.isInteger();
5137   if (LR == RR && CC0 == CC1 && IsInteger) {
5138     bool IsZero = isNullOrNullSplat(LR);
5139     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5140
5141     // All bits clear?
5142     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5143     // All sign bits clear?
5144     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5145     // Any bits set?
5146     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5147     // Any sign bits set?
5148     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5149
5150     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
5151     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5152     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
5153     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
5154     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5155       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5156       AddToWorklist(Or.getNode());
5157       return DAG.getSetCC(DL, VT, Or, LR, CC1);
5158     }
5159
5160     // All bits set?
5161     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5162     // All sign bits set?
5163     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5164     // Any bits clear?
5165     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5166     // Any sign bits clear?
5167     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5168
5169     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5170     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
5171     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5172     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
5173     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5174       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5175       AddToWorklist(And.getNode());
5176       return DAG.getSetCC(DL, VT, And, LR, CC1);
5177     }
5178   }
5179
5180   // TODO: What is the 'or' equivalent of this fold?
5181   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5182   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5183       IsInteger && CC0 == ISD::SETNE &&
5184       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5185        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5186     SDValue One = DAG.getConstant(1, DL, OpVT);
5187     SDValue Two = DAG.getConstant(2, DL, OpVT);
5188     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5189     AddToWorklist(Add.getNode());
5190     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5191   }
5192
5193   // Try more general transforms if the predicates match and the only user of
5194   // the compares is the 'and' or 'or'.
5195   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5196       N0.hasOneUse() && N1.hasOneUse()) {
5197     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5198     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5199     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5200       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5201       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5202       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5203       SDValue Zero = DAG.getConstant(0, DL, OpVT);
5204       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5205     }
5206
5207     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5208     // TODO - support non-uniform vector amounts.
5209     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5210       // Match a shared variable operand and 2 non-opaque constant operands.
5211       ConstantSDNode *C0 = isConstOrConstSplat(LR);
5212       ConstantSDNode *C1 = isConstOrConstSplat(RR);
5213       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5214         const APInt &CMax =
5215             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5216         const APInt &CMin =
5217             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5218         // The difference of the constants must be a single bit.
5219         if ((CMax - CMin).isPowerOf2()) {
5220           // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5221           // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5222           SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5223           SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5224           SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5225           SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5226           SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5227           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5228           SDValue Zero = DAG.getConstant(0, DL, OpVT);
5229           return DAG.getSetCC(DL, VT, And, Zero, CC0);
5230         }
5231       }
5232     }
5233   }
5234
5235   // Canonicalize equivalent operands to LL == RL.
5236   if (LL == RR && LR == RL) {
5237     CC1 = ISD::getSetCCSwappedOperands(CC1);
5238     std::swap(RL, RR);
5239   }
5240
5241   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5242   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5243   if (LL == RL && LR == RR) {
5244     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5245                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5246     if (NewCC != ISD::SETCC_INVALID &&
5247         (!LegalOperations ||
5248          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5249           TLI.isOperationLegal(ISD::SETCC, OpVT))))
5250       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5251   }
5252
5253   return SDValue();
5254 }
5255
5256 /// This contains all DAGCombine rules which reduce two values combined by
5257 /// an And operation to a single value. This makes them reusable in the context
5258 /// of visitSELECT(). Rules involving constants are not included as
5259 /// visitSELECT() already handles those cases.
5260 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5261   EVT VT = N1.getValueType();
5262   SDLoc DL(N);
5263
5264   // fold (and x, undef) -> 0
5265   if (N0.isUndef() || N1.isUndef())
5266     return DAG.getConstant(0, DL, VT);
5267
5268   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5269     return V;
5270
5271   // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
5272   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5273       VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
5274     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5275       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5276         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5277         // immediate for an add, but it is legal if its top c2 bits are set,
5278         // transform the ADD so the immediate doesn't need to be materialized
5279         // in a register.
5280         APInt ADDC = ADDI->getAPIntValue();
5281         APInt SRLC = SRLI->getAPIntValue();
5282         if (ADDC.getMinSignedBits() <= 64 &&
5283             SRLC.ult(VT.getSizeInBits()) &&
5284             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5285           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5286                                              SRLC.getZExtValue());
5287           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5288             ADDC |= Mask;
5289             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5290               SDLoc DL0(N0);
5291               SDValue NewAdd =
5292                 DAG.getNode(ISD::ADD, DL0, VT,
5293                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5294               CombineTo(N0.getNode(), NewAdd);
5295               // Return N so it doesn't get rechecked!
5296               return SDValue(N, 0);
5297             }
5298           }
5299         }
5300       }
5301     }
5302   }
5303
5304   // Reduce bit extract of low half of an integer to the narrower type.
5305   // (and (srl i64:x, K), KMask) ->
5306   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5307   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5308     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5309       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5310         unsigned Size = VT.getSizeInBits();
5311         const APInt &AndMask = CAnd->getAPIntValue();
5312         unsigned ShiftBits = CShift->getZExtValue();
5313
5314         // Bail out, this node will probably disappear anyway.
5315         if (ShiftBits == 0)
5316           return SDValue();
5317
5318         unsigned MaskBits = AndMask.countTrailingOnes();
5319         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5320
5321         if (AndMask.isMask() &&
5322             // Required bits must not span the two halves of the integer and
5323             // must fit in the half size type.
5324             (ShiftBits + MaskBits <= Size / 2) &&
5325             TLI.isNarrowingProfitable(VT, HalfVT) &&
5326             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5327             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5328             TLI.isTruncateFree(VT, HalfVT) &&
5329             TLI.isZExtFree(HalfVT, VT)) {
5330           // The isNarrowingProfitable is to avoid regressions on PPC and
5331           // AArch64 which match a few 64-bit bit insert / bit extract patterns
5332           // on downstream users of this. Those patterns could probably be
5333           // extended to handle extensions mixed in.
5334
5335           SDValue SL(N0);
5336           assert(MaskBits <= Size);
5337
5338           // Extracting the highest bit of the low half.
5339           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5340           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5341                                       N0.getOperand(0));
5342
5343           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5344           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5345           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5346           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5347           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5348         }
5349       }
5350     }
5351   }
5352
5353   return SDValue();
5354 }
5355
5356 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5357                                    EVT LoadResultTy, EVT &ExtVT) {
5358   if (!AndC->getAPIntValue().isMask())
5359     return false;
5360
5361   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5362
5363   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5364   EVT LoadedVT = LoadN->getMemoryVT();
5365
5366   if (ExtVT == LoadedVT &&
5367       (!LegalOperations ||
5368        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5369     // ZEXTLOAD will match without needing to change the size of the value being
5370     // loaded.
5371     return true;
5372   }
5373
5374   // Do not change the width of a volatile or atomic loads.
5375   if (!LoadN->isSimple())
5376     return false;
5377
5378   // Do not generate loads of non-round integer types since these can
5379   // be expensive (and would be wrong if the type is not byte sized).
5380   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5381     return false;
5382
5383   if (LegalOperations &&
5384       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5385     return false;
5386
5387   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5388     return false;
5389
5390   return true;
5391 }
5392
5393 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5394                                     ISD::LoadExtType ExtType, EVT &MemVT,
5395                                     unsigned ShAmt) {
5396   if (!LDST)
5397     return false;
5398   // Only allow byte offsets.
5399   if (ShAmt % 8)
5400     return false;
5401
5402   // Do not generate loads of non-round integer types since these can
5403   // be expensive (and would be wrong if the type is not byte sized).
5404   if (!MemVT.isRound())
5405     return false;
5406
5407   // Don't change the width of a volatile or atomic loads.
5408   if (!LDST->isSimple())
5409     return false;
5410
5411   EVT LdStMemVT = LDST->getMemoryVT();
5412
5413   // Bail out when changing the scalable property, since we can't be sure that
5414   // we're actually narrowing here.
5415   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5416     return false;
5417
5418   // Verify that we are actually reducing a load width here.
5419   if (LdStMemVT.bitsLT(MemVT))
5420     return false;
5421
5422   // Ensure that this isn't going to produce an unsupported memory access.
5423   if (ShAmt) {
5424     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5425     const unsigned ByteShAmt = ShAmt / 8;
5426     const Align LDSTAlign = LDST->getAlign();
5427     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5428     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5429                                 LDST->getAddressSpace(), NarrowAlign,
5430                                 LDST->getMemOperand()->getFlags()))
5431       return false;
5432   }
5433
5434   // It's not possible to generate a constant of extended or untyped type.
5435   EVT PtrType = LDST->getBasePtr().getValueType();
5436   if (PtrType == MVT::Untyped || PtrType.isExtended())
5437     return false;
5438
5439   if (isa<LoadSDNode>(LDST)) {
5440     LoadSDNode *Load = cast<LoadSDNode>(LDST);
5441     // Don't transform one with multiple uses, this would require adding a new
5442     // load.
5443     if (!SDValue(Load, 0).hasOneUse())
5444       return false;
5445
5446     if (LegalOperations &&
5447         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5448       return false;
5449
5450     // For the transform to be legal, the load must produce only two values
5451     // (the value loaded and the chain).  Don't transform a pre-increment
5452     // load, for example, which produces an extra value.  Otherwise the
5453     // transformation is not equivalent, and the downstream logic to replace
5454     // uses gets things wrong.
5455     if (Load->getNumValues() > 2)
5456       return false;
5457
5458     // If the load that we're shrinking is an extload and we're not just
5459     // discarding the extension we can't simply shrink the load. Bail.
5460     // TODO: It would be possible to merge the extensions in some cases.
5461     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5462         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5463       return false;
5464
5465     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5466       return false;
5467   } else {
5468     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5469     StoreSDNode *Store = cast<StoreSDNode>(LDST);
5470     // Can't write outside the original store
5471     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5472       return false;
5473
5474     if (LegalOperations &&
5475         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5476       return false;
5477   }
5478   return true;
5479 }
5480
5481 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5482                                     SmallVectorImpl<LoadSDNode*> &Loads,
5483                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5484                                     ConstantSDNode *Mask,
5485                                     SDNode *&NodeToMask) {
5486   // Recursively search for the operands, looking for loads which can be
5487   // narrowed.
5488   for (SDValue Op : N->op_values()) {
5489     if (Op.getValueType().isVector())
5490       return false;
5491
5492     // Some constants may need fixing up later if they are too large.
5493     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5494       if (Mask->getValueType(0) != C->getValueType(0))
5495         return false;
5496       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5497           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5498         NodesWithConsts.insert(N);
5499       continue;
5500     }
5501
5502     if (!Op.hasOneUse())
5503       return false;
5504
5505     switch(Op.getOpcode()) {
5506     case ISD::LOAD: {
5507       auto *Load = cast<LoadSDNode>(Op);
5508       EVT ExtVT;
5509       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5510           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5511
5512         // ZEXTLOAD is already small enough.
5513         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5514             ExtVT.bitsGE(Load->getMemoryVT()))
5515           continue;
5516
5517         // Use LE to convert equal sized loads to zext.
5518         if (ExtVT.bitsLE(Load->getMemoryVT()))
5519           Loads.push_back(Load);
5520
5521         continue;
5522       }
5523       return false;
5524     }
5525     case ISD::ZERO_EXTEND:
5526     case ISD::AssertZext: {
5527       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5528       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5529       EVT VT = Op.getOpcode() == ISD::AssertZext
5530                    ? cast<VTSDNode>(Op.getOperand(1))->getVT()
5531                    : Op.getOperand(0).getValueType();
5532
5533       // We can accept extending nodes if the mask is wider or an equal
5534       // width to the original type.
5535       if (ExtVT.bitsGE(VT))
5536         continue;
5537       break;
5538     }
5539     case ISD::ANY_EXTEND: {
5540       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5541       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5542       EVT VT = Op.getOperand(0).getValueType();
5543       if (ExtVT.bitsGE(VT))
5544         break;
5545       // Fallthrough to searching for nodes from the operands of the extend.
5546       LLVM_FALLTHROUGH;
5547     }
5548     case ISD::OR:
5549     case ISD::XOR:
5550     case ISD::AND:
5551       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5552                              NodeToMask))
5553         return false;
5554       continue;
5555     }
5556
5557     // Allow one node which will masked along with any loads found.
5558     if (NodeToMask)
5559       return false;
5560
5561     // Also ensure that the node to be masked only produces one data result.
5562     NodeToMask = Op.getNode();
5563     if (NodeToMask->getNumValues() > 1) {
5564       bool HasValue = false;
5565       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5566         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5567         if (VT != MVT::Glue && VT != MVT::Other) {
5568           if (HasValue) {
5569             NodeToMask = nullptr;
5570             return false;
5571           }
5572           HasValue = true;
5573         }
5574       }
5575       assert(HasValue && "Node to be masked has no data result?");
5576     }
5577   }
5578   return true;
5579 }
5580
5581 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5582   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5583   if (!Mask)
5584     return false;
5585
5586   if (!Mask->getAPIntValue().isMask())
5587     return false;
5588
5589   // No need to do anything if the and directly uses a load.
5590   if (isa<LoadSDNode>(N->getOperand(0)))
5591     return false;
5592
5593   SmallVector<LoadSDNode*, 8> Loads;
5594   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5595   SDNode *FixupNode = nullptr;
5596   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5597     if (Loads.size() == 0)
5598       return false;
5599
5600     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5601     SDValue MaskOp = N->getOperand(1);
5602
5603     // If it exists, fixup the single node we allow in the tree that needs
5604     // masking.
5605     if (FixupNode) {
5606       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5607       SDValue MaskOpT = DAG.getZExtOrTrunc(MaskOp, SDLoc(FixupNode),
5608                                            FixupNode->getValueType(0));
5609       SDValue And =
5610           DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0),
5611                       SDValue(FixupNode, 0), MaskOpT);
5612       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5613       if (And.getOpcode() == ISD ::AND)
5614         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOpT);
5615     }
5616
5617     // Narrow any constants that need it.
5618     for (auto *LogicN : NodesWithConsts) {
5619       SDValue Op0 = LogicN->getOperand(0);
5620       SDValue Op1 = LogicN->getOperand(1);
5621
5622       if (isa<ConstantSDNode>(Op0))
5623         std::swap(Op0, Op1);
5624
5625       SDValue MaskOpT =
5626           DAG.getZExtOrTrunc(MaskOp, SDLoc(Op1), Op1.getValueType());
5627       SDValue And =
5628           DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOpT);
5629
5630       DAG.UpdateNodeOperands(LogicN, Op0, And);
5631     }
5632
5633     // Create narrow loads.
5634     for (auto *Load : Loads) {
5635       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5636       SDValue MaskOpT =
5637           DAG.getZExtOrTrunc(MaskOp, SDLoc(Load), Load->getValueType(0));
5638       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5639                                 SDValue(Load, 0), MaskOpT);
5640       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5641       if (And.getOpcode() == ISD ::AND)
5642         And = SDValue(
5643             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOpT), 0);
5644       SDValue NewLoad = reduceLoadWidth(And.getNode());
5645       assert(NewLoad &&
5646              "Shouldn't be masking the load if it can't be narrowed");
5647       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5648     }
5649     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5650     return true;
5651   }
5652   return false;
5653 }
5654
5655 // Unfold
5656 //    x &  (-1 'logical shift' y)
5657 // To
5658 //    (x 'opposite logical shift' y) 'logical shift' y
5659 // if it is better for performance.
5660 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5661   assert(N->getOpcode() == ISD::AND);
5662
5663   SDValue N0 = N->getOperand(0);
5664   SDValue N1 = N->getOperand(1);
5665
5666   // Do we actually prefer shifts over mask?
5667   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5668     return SDValue();
5669
5670   // Try to match  (-1 '[outer] logical shift' y)
5671   unsigned OuterShift;
5672   unsigned InnerShift; // The opposite direction to the OuterShift.
5673   SDValue Y;           // Shift amount.
5674   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5675     if (!M.hasOneUse())
5676       return false;
5677     OuterShift = M->getOpcode();
5678     if (OuterShift == ISD::SHL)
5679       InnerShift = ISD::SRL;
5680     else if (OuterShift == ISD::SRL)
5681       InnerShift = ISD::SHL;
5682     else
5683       return false;
5684     if (!isAllOnesConstant(M->getOperand(0)))
5685       return false;
5686     Y = M->getOperand(1);
5687     return true;
5688   };
5689
5690   SDValue X;
5691   if (matchMask(N1))
5692     X = N0;
5693   else if (matchMask(N0))
5694     X = N1;
5695   else
5696     return SDValue();
5697
5698   SDLoc DL(N);
5699   EVT VT = N->getValueType(0);
5700
5701   //     tmp = x   'opposite logical shift' y
5702   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5703   //     ret = tmp 'logical shift' y
5704   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5705
5706   return T1;
5707 }
5708
5709 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5710 /// For a target with a bit test, this is expected to become test + set and save
5711 /// at least 1 instruction.
5712 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5713   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5714
5715   // This is probably not worthwhile without a supported type.
5716   EVT VT = And->getValueType(0);
5717   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5718   if (!TLI.isTypeLegal(VT))
5719     return SDValue();
5720
5721   // Look through an optional extension and find a 'not'.
5722   // TODO: Should we favor test+set even without the 'not' op?
5723   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5724   if (Not.getOpcode() == ISD::ANY_EXTEND)
5725     Not = Not.getOperand(0);
5726   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5727     return SDValue();
5728
5729   // Look though an optional truncation. The source operand may not be the same
5730   // type as the original 'and', but that is ok because we are masking off
5731   // everything but the low bit.
5732   SDValue Srl = Not.getOperand(0);
5733   if (Srl.getOpcode() == ISD::TRUNCATE)
5734     Srl = Srl.getOperand(0);
5735
5736   // Match a shift-right by constant.
5737   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5738       !isa<ConstantSDNode>(Srl.getOperand(1)))
5739     return SDValue();
5740
5741   // We might have looked through casts that make this transform invalid.
5742   // TODO: If the source type is wider than the result type, do the mask and
5743   //       compare in the source type.
5744   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5745   unsigned VTBitWidth = VT.getSizeInBits();
5746   if (ShiftAmt.uge(VTBitWidth))
5747     return SDValue();
5748
5749   // Turn this into a bit-test pattern using mask op + setcc:
5750   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5751   SDLoc DL(And);
5752   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5753   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5754   SDValue Mask = DAG.getConstant(
5755       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5756   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5757   SDValue Zero = DAG.getConstant(0, DL, VT);
5758   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5759   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5760 }
5761
5762 /// For targets that support usubsat, match a bit-hack form of that operation
5763 /// that ends in 'and' and convert it.
5764 static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
5765   SDValue N0 = N->getOperand(0);
5766   SDValue N1 = N->getOperand(1);
5767   EVT VT = N1.getValueType();
5768
5769   // Canonicalize SRA as operand 1.
5770   if (N0.getOpcode() == ISD::SRA)
5771     std::swap(N0, N1);
5772
5773   // xor/add with SMIN (signmask) are logically equivalent.
5774   if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
5775     return SDValue();
5776
5777   if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
5778       N0.getOperand(0) != N1.getOperand(0))
5779     return SDValue();
5780
5781   unsigned BitWidth = VT.getScalarSizeInBits();
5782   ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
5783   ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
5784   if (!XorC || !XorC->getAPIntValue().isSignMask() ||
5785       !SraC || SraC->getAPIntValue() != BitWidth - 1)
5786     return SDValue();
5787
5788   // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
5789   // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
5790   SDLoc DL(N);
5791   SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
5792   return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
5793 }
5794
5795 SDValue DAGCombiner::visitAND(SDNode *N) {
5796   SDValue N0 = N->getOperand(0);
5797   SDValue N1 = N->getOperand(1);
5798   EVT VT = N1.getValueType();
5799
5800   // x & x --> x
5801   if (N0 == N1)
5802     return N0;
5803
5804   // fold (and c1, c2) -> c1&c2
5805   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5806     return C;
5807
5808   // canonicalize constant to RHS
5809   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5810       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5811     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5812
5813   // fold vector ops
5814   if (VT.isVector()) {
5815     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
5816       return FoldedVOp;
5817
5818     // fold (and x, 0) -> 0, vector edition
5819     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
5820       // do not return N0, because undef node may exist in N0
5821       return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),
5822                              SDLoc(N), N0.getValueType());
5823     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5824       // do not return N1, because undef node may exist in N1
5825       return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
5826                              SDLoc(N), N1.getValueType());
5827
5828     // fold (and x, -1) -> x, vector edition
5829     if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
5830       return N1;
5831     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
5832       return N0;
5833
5834     // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5835     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5836     auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5837     if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5838         N0.hasOneUse() && N1.hasOneUse()) {
5839       EVT LoadVT = MLoad->getMemoryVT();
5840       EVT ExtVT = VT;
5841       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5842         // For this AND to be a zero extension of the masked load the elements
5843         // of the BuildVec must mask the bottom bits of the extended element
5844         // type
5845         if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5846           uint64_t ElementSize =
5847               LoadVT.getVectorElementType().getScalarSizeInBits();
5848           if (Splat->getAPIntValue().isMask(ElementSize)) {
5849             return DAG.getMaskedLoad(
5850                 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5851                 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5852                 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5853                 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5854           }
5855         }
5856       }
5857     }
5858   }
5859
5860   // fold (and x, -1) -> x
5861   if (isAllOnesConstant(N1))
5862     return N0;
5863
5864   // if (and x, c) is known to be zero, return 0
5865   unsigned BitWidth = VT.getScalarSizeInBits();
5866   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5867   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
5868     return DAG.getConstant(0, SDLoc(N), VT);
5869
5870   if (SDValue NewSel = foldBinOpIntoSelect(N))
5871     return NewSel;
5872
5873   // reassociate and
5874   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5875     return RAND;
5876
5877   // Try to convert a constant mask AND into a shuffle clear mask.
5878   if (VT.isVector())
5879     if (SDValue Shuffle = XformToShuffleWithZero(N))
5880       return Shuffle;
5881
5882   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5883     return Combined;
5884
5885   // fold (and (or x, C), D) -> D if (C & D) == D
5886   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5887     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5888   };
5889   if (N0.getOpcode() == ISD::OR &&
5890       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5891     return N1;
5892   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5893   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5894     SDValue N0Op0 = N0.getOperand(0);
5895     APInt Mask = ~N1C->getAPIntValue();
5896     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5897     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5898       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5899                                  N0.getValueType(), N0Op0);
5900
5901       // Replace uses of the AND with uses of the Zero extend node.
5902       CombineTo(N, Zext);
5903
5904       // We actually want to replace all uses of the any_extend with the
5905       // zero_extend, to avoid duplicating things.  This will later cause this
5906       // AND to be folded.
5907       CombineTo(N0.getNode(), Zext);
5908       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5909     }
5910   }
5911
5912   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5913   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5914   // already be zero by virtue of the width of the base type of the load.
5915   //
5916   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5917   // more cases.
5918   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5919        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5920        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5921        N0.getOperand(0).getResNo() == 0) ||
5922       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5923     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5924                                          N0 : N0.getOperand(0) );
5925
5926     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5927     // This can be a pure constant or a vector splat, in which case we treat the
5928     // vector as a scalar and use the splat value.
5929     APInt Constant = APInt::getZero(1);
5930     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5931       Constant = C->getAPIntValue();
5932     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5933       APInt SplatValue, SplatUndef;
5934       unsigned SplatBitSize;
5935       bool HasAnyUndefs;
5936       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5937                                              SplatBitSize, HasAnyUndefs);
5938       if (IsSplat) {
5939         // Undef bits can contribute to a possible optimisation if set, so
5940         // set them.
5941         SplatValue |= SplatUndef;
5942
5943         // The splat value may be something like "0x00FFFFFF", which means 0 for
5944         // the first vector value and FF for the rest, repeating. We need a mask
5945         // that will apply equally to all members of the vector, so AND all the
5946         // lanes of the constant together.
5947         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5948
5949         // If the splat value has been compressed to a bitlength lower
5950         // than the size of the vector lane, we need to re-expand it to
5951         // the lane size.
5952         if (EltBitWidth > SplatBitSize)
5953           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5954                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5955             SplatValue |= SplatValue.shl(SplatBitSize);
5956
5957         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5958         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5959         if ((SplatBitSize % EltBitWidth) == 0) {
5960           Constant = APInt::getAllOnes(EltBitWidth);
5961           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5962             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5963         }
5964       }
5965     }
5966
5967     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5968     // actually legal and isn't going to get expanded, else this is a false
5969     // optimisation.
5970     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5971                                                     Load->getValueType(0),
5972                                                     Load->getMemoryVT());
5973
5974     // Resize the constant to the same size as the original memory access before
5975     // extension. If it is still the AllOnesValue then this AND is completely
5976     // unneeded.
5977     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5978
5979     bool B;
5980     switch (Load->getExtensionType()) {
5981     default: B = false; break;
5982     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5983     case ISD::ZEXTLOAD:
5984     case ISD::NON_EXTLOAD: B = true; break;
5985     }
5986
5987     if (B && Constant.isAllOnes()) {
5988       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5989       // preserve semantics once we get rid of the AND.
5990       SDValue NewLoad(Load, 0);
5991
5992       // Fold the AND away. NewLoad may get replaced immediately.
5993       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5994
5995       if (Load->getExtensionType() == ISD::EXTLOAD) {
5996         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5997                               Load->getValueType(0), SDLoc(Load),
5998                               Load->getChain(), Load->getBasePtr(),
5999                               Load->getOffset(), Load->getMemoryVT(),
6000                               Load->getMemOperand());
6001         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
6002         if (Load->getNumValues() == 3) {
6003           // PRE/POST_INC loads have 3 values.
6004           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
6005                            NewLoad.getValue(2) };
6006           CombineTo(Load, To, 3, true);
6007         } else {
6008           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
6009         }
6010       }
6011
6012       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6013     }
6014   }
6015
6016   // fold (and (masked_gather x)) -> (zext_masked_gather x)
6017   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
6018     EVT MemVT = GN0->getMemoryVT();
6019     EVT ScalarVT = MemVT.getScalarType();
6020
6021     if (SDValue(GN0, 0).hasOneUse() &&
6022         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
6023         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
6024       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
6025                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
6026
6027       SDValue ZExtLoad = DAG.getMaskedGather(
6028           DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
6029           GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
6030
6031       CombineTo(N, ZExtLoad);
6032       AddToWorklist(ZExtLoad.getNode());
6033       // Avoid recheck of N.
6034       return SDValue(N, 0);
6035     }
6036   }
6037
6038   // fold (and (load x), 255) -> (zextload x, i8)
6039   // fold (and (extload x, i16), 255) -> (zextload x, i8)
6040   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
6041   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
6042                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
6043                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
6044     if (SDValue Res = reduceLoadWidth(N)) {
6045       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
6046         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
6047       AddToWorklist(N);
6048       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
6049       return SDValue(N, 0);
6050     }
6051   }
6052
6053   if (LegalTypes) {
6054     // Attempt to propagate the AND back up to the leaves which, if they're
6055     // loads, can be combined to narrow loads and the AND node can be removed.
6056     // Perform after legalization so that extend nodes will already be
6057     // combined into the loads.
6058     if (BackwardsPropagateMask(N))
6059       return SDValue(N, 0);
6060   }
6061
6062   if (SDValue Combined = visitANDLike(N0, N1, N))
6063     return Combined;
6064
6065   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
6066   if (N0.getOpcode() == N1.getOpcode())
6067     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6068       return V;
6069
6070   // Masking the negated extension of a boolean is just the zero-extended
6071   // boolean:
6072   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
6073   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
6074   //
6075   // Note: the SimplifyDemandedBits fold below can make an information-losing
6076   // transform, and then we have no way to find this better fold.
6077   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
6078     if (isNullOrNullSplat(N0.getOperand(0))) {
6079       SDValue SubRHS = N0.getOperand(1);
6080       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
6081           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6082         return SubRHS;
6083       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
6084           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6085         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
6086     }
6087   }
6088
6089   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
6090   // fold (and (sra)) -> (and (srl)) when possible.
6091   if (SimplifyDemandedBits(SDValue(N, 0)))
6092     return SDValue(N, 0);
6093
6094   // fold (zext_inreg (extload x)) -> (zextload x)
6095   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
6096   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
6097       (ISD::isEXTLoad(N0.getNode()) ||
6098        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
6099     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6100     EVT MemVT = LN0->getMemoryVT();
6101     // If we zero all the possible extended bits, then we can turn this into
6102     // a zextload if we are running before legalize or the operation is legal.
6103     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
6104     unsigned MemBitSize = MemVT.getScalarSizeInBits();
6105     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
6106     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
6107         ((!LegalOperations && LN0->isSimple()) ||
6108          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
6109       SDValue ExtLoad =
6110           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
6111                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
6112       AddToWorklist(N);
6113       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6114       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6115     }
6116   }
6117
6118   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
6119   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
6120     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
6121                                            N0.getOperand(1), false))
6122       return BSwap;
6123   }
6124
6125   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
6126     return Shifts;
6127
6128   if (TLI.hasBitTest(N0, N1))
6129     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
6130       return V;
6131
6132   // Recognize the following pattern:
6133   //
6134   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
6135   //
6136   // where bitmask is a mask that clears the upper bits of AndVT. The
6137   // number of bits in bitmask must be a power of two.
6138   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
6139     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
6140       return false;
6141
6142     auto *C = dyn_cast<ConstantSDNode>(RHS);
6143     if (!C)
6144       return false;
6145
6146     if (!C->getAPIntValue().isMask(
6147             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
6148       return false;
6149
6150     return true;
6151   };
6152
6153   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
6154   if (IsAndZeroExtMask(N0, N1))
6155     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
6156
6157   if (hasOperation(ISD::USUBSAT, VT))
6158     if (SDValue V = foldAndToUsubsat(N, DAG))
6159       return V;
6160
6161   return SDValue();
6162 }
6163
6164 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
6165 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
6166                                         bool DemandHighBits) {
6167   if (!LegalOperations)
6168     return SDValue();
6169
6170   EVT VT = N->getValueType(0);
6171   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
6172     return SDValue();
6173   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6174     return SDValue();
6175
6176   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6177   bool LookPassAnd0 = false;
6178   bool LookPassAnd1 = false;
6179   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6180       std::swap(N0, N1);
6181   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6182       std::swap(N0, N1);
6183   if (N0.getOpcode() == ISD::AND) {
6184     if (!N0.getNode()->hasOneUse())
6185       return SDValue();
6186     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6187     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6188     // This is needed for X86.
6189     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6190                   N01C->getZExtValue() != 0xFFFF))
6191       return SDValue();
6192     N0 = N0.getOperand(0);
6193     LookPassAnd0 = true;
6194   }
6195
6196   if (N1.getOpcode() == ISD::AND) {
6197     if (!N1.getNode()->hasOneUse())
6198       return SDValue();
6199     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6200     if (!N11C || N11C->getZExtValue() != 0xFF)
6201       return SDValue();
6202     N1 = N1.getOperand(0);
6203     LookPassAnd1 = true;
6204   }
6205
6206   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6207     std::swap(N0, N1);
6208   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6209     return SDValue();
6210   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
6211     return SDValue();
6212
6213   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6214   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6215   if (!N01C || !N11C)
6216     return SDValue();
6217   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6218     return SDValue();
6219
6220   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6221   SDValue N00 = N0->getOperand(0);
6222   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6223     if (!N00.getNode()->hasOneUse())
6224       return SDValue();
6225     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6226     if (!N001C || N001C->getZExtValue() != 0xFF)
6227       return SDValue();
6228     N00 = N00.getOperand(0);
6229     LookPassAnd0 = true;
6230   }
6231
6232   SDValue N10 = N1->getOperand(0);
6233   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6234     if (!N10.getNode()->hasOneUse())
6235       return SDValue();
6236     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6237     // Also allow 0xFFFF since the bits will be shifted out. This is needed
6238     // for X86.
6239     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6240                    N101C->getZExtValue() != 0xFFFF))
6241       return SDValue();
6242     N10 = N10.getOperand(0);
6243     LookPassAnd1 = true;
6244   }
6245
6246   if (N00 != N10)
6247     return SDValue();
6248
6249   // Make sure everything beyond the low halfword gets set to zero since the SRL
6250   // 16 will clear the top bits.
6251   unsigned OpSizeInBits = VT.getSizeInBits();
6252   if (DemandHighBits && OpSizeInBits > 16) {
6253     // If the left-shift isn't masked out then the only way this is a bswap is
6254     // if all bits beyond the low 8 are 0. In that case the entire pattern
6255     // reduces to a left shift anyway: leave it for other parts of the combiner.
6256     if (!LookPassAnd0)
6257       return SDValue();
6258
6259     // However, if the right shift isn't masked out then it might be because
6260     // it's not needed. See if we can spot that too.
6261     if (!LookPassAnd1 &&
6262         !DAG.MaskedValueIsZero(
6263             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6264       return SDValue();
6265   }
6266
6267   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6268   if (OpSizeInBits > 16) {
6269     SDLoc DL(N);
6270     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6271                       DAG.getConstant(OpSizeInBits - 16, DL,
6272                                       getShiftAmountTy(VT)));
6273   }
6274   return Res;
6275 }
6276
6277 /// Return true if the specified node is an element that makes up a 32-bit
6278 /// packed halfword byteswap.
6279 /// ((x & 0x000000ff) << 8) |
6280 /// ((x & 0x0000ff00) >> 8) |
6281 /// ((x & 0x00ff0000) << 8) |
6282 /// ((x & 0xff000000) >> 8)
6283 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6284   if (!N.getNode()->hasOneUse())
6285     return false;
6286
6287   unsigned Opc = N.getOpcode();
6288   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6289     return false;
6290
6291   SDValue N0 = N.getOperand(0);
6292   unsigned Opc0 = N0.getOpcode();
6293   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6294     return false;
6295
6296   ConstantSDNode *N1C = nullptr;
6297   // SHL or SRL: look upstream for AND mask operand
6298   if (Opc == ISD::AND)
6299     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6300   else if (Opc0 == ISD::AND)
6301     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6302   if (!N1C)
6303     return false;
6304
6305   unsigned MaskByteOffset;
6306   switch (N1C->getZExtValue()) {
6307   default:
6308     return false;
6309   case 0xFF:       MaskByteOffset = 0; break;
6310   case 0xFF00:     MaskByteOffset = 1; break;
6311   case 0xFFFF:
6312     // In case demanded bits didn't clear the bits that will be shifted out.
6313     // This is needed for X86.
6314     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6315       MaskByteOffset = 1;
6316       break;
6317     }
6318     return false;
6319   case 0xFF0000:   MaskByteOffset = 2; break;
6320   case 0xFF000000: MaskByteOffset = 3; break;
6321   }
6322
6323   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6324   if (Opc == ISD::AND) {
6325     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6326       // (x >> 8) & 0xff
6327       // (x >> 8) & 0xff0000
6328       if (Opc0 != ISD::SRL)
6329         return false;
6330       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6331       if (!C || C->getZExtValue() != 8)
6332         return false;
6333     } else {
6334       // (x << 8) & 0xff00
6335       // (x << 8) & 0xff000000
6336       if (Opc0 != ISD::SHL)
6337         return false;
6338       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6339       if (!C || C->getZExtValue() != 8)
6340         return false;
6341     }
6342   } else if (Opc == ISD::SHL) {
6343     // (x & 0xff) << 8
6344     // (x & 0xff0000) << 8
6345     if (MaskByteOffset != 0 && MaskByteOffset != 2)
6346       return false;
6347     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6348     if (!C || C->getZExtValue() != 8)
6349       return false;
6350   } else { // Opc == ISD::SRL
6351     // (x & 0xff00) >> 8
6352     // (x & 0xff000000) >> 8
6353     if (MaskByteOffset != 1 && MaskByteOffset != 3)
6354       return false;
6355     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6356     if (!C || C->getZExtValue() != 8)
6357       return false;
6358   }
6359
6360   if (Parts[MaskByteOffset])
6361     return false;
6362
6363   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6364   return true;
6365 }
6366
6367 // Match 2 elements of a packed halfword bswap.
6368 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6369   if (N.getOpcode() == ISD::OR)
6370     return isBSwapHWordElement(N.getOperand(0), Parts) &&
6371            isBSwapHWordElement(N.getOperand(1), Parts);
6372
6373   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6374     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6375     if (!C || C->getAPIntValue() != 16)
6376       return false;
6377     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6378     return true;
6379   }
6380
6381   return false;
6382 }
6383
6384 // Match this pattern:
6385 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6386 // And rewrite this to:
6387 //   (rotr (bswap A), 16)
6388 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6389                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
6390                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
6391   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6392          "MatchBSwapHWordOrAndAnd: expecting i32");
6393   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6394     return SDValue();
6395   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6396     return SDValue();
6397   // TODO: this is too restrictive; lifting this restriction requires more tests
6398   if (!N0->hasOneUse() || !N1->hasOneUse())
6399     return SDValue();
6400   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6401   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6402   if (!Mask0 || !Mask1)
6403     return SDValue();
6404   if (Mask0->getAPIntValue() != 0xff00ff00 ||
6405       Mask1->getAPIntValue() != 0x00ff00ff)
6406     return SDValue();
6407   SDValue Shift0 = N0.getOperand(0);
6408   SDValue Shift1 = N1.getOperand(0);
6409   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6410     return SDValue();
6411   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6412   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6413   if (!ShiftAmt0 || !ShiftAmt1)
6414     return SDValue();
6415   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6416     return SDValue();
6417   if (Shift0.getOperand(0) != Shift1.getOperand(0))
6418     return SDValue();
6419
6420   SDLoc DL(N);
6421   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6422   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6423   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6424 }
6425
6426 /// Match a 32-bit packed halfword bswap. That is
6427 /// ((x & 0x000000ff) << 8) |
6428 /// ((x & 0x0000ff00) >> 8) |
6429 /// ((x & 0x00ff0000) << 8) |
6430 /// ((x & 0xff000000) >> 8)
6431 /// => (rotl (bswap x), 16)
6432 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6433   if (!LegalOperations)
6434     return SDValue();
6435
6436   EVT VT = N->getValueType(0);
6437   if (VT != MVT::i32)
6438     return SDValue();
6439   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6440     return SDValue();
6441
6442   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6443                                               getShiftAmountTy(VT)))
6444   return BSwap;
6445
6446   // Try again with commuted operands.
6447   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6448                                               getShiftAmountTy(VT)))
6449   return BSwap;
6450
6451
6452   // Look for either
6453   // (or (bswaphpair), (bswaphpair))
6454   // (or (or (bswaphpair), (and)), (and))
6455   // (or (or (and), (bswaphpair)), (and))
6456   SDNode *Parts[4] = {};
6457
6458   if (isBSwapHWordPair(N0, Parts)) {
6459     // (or (or (and), (and)), (or (and), (and)))
6460     if (!isBSwapHWordPair(N1, Parts))
6461       return SDValue();
6462   } else if (N0.getOpcode() == ISD::OR) {
6463     // (or (or (or (and), (and)), (and)), (and))
6464     if (!isBSwapHWordElement(N1, Parts))
6465       return SDValue();
6466     SDValue N00 = N0.getOperand(0);
6467     SDValue N01 = N0.getOperand(1);
6468     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6469         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6470       return SDValue();
6471   } else
6472     return SDValue();
6473
6474   // Make sure the parts are all coming from the same node.
6475   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6476     return SDValue();
6477
6478   SDLoc DL(N);
6479   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6480                               SDValue(Parts[0], 0));
6481
6482   // Result of the bswap should be rotated by 16. If it's not legal, then
6483   // do  (x << 16) | (x >> 16).
6484   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6485   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6486     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6487   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6488     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6489   return DAG.getNode(ISD::OR, DL, VT,
6490                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6491                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6492 }
6493
6494 /// This contains all DAGCombine rules which reduce two values combined by
6495 /// an Or operation to a single value \see visitANDLike().
6496 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6497   EVT VT = N1.getValueType();
6498   SDLoc DL(N);
6499
6500   // fold (or x, undef) -> -1
6501   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6502     return DAG.getAllOnesConstant(DL, VT);
6503
6504   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6505     return V;
6506
6507   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
6508   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6509       // Don't increase # computations.
6510       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6511     // We can only do this xform if we know that bits from X that are set in C2
6512     // but not in C1 are already zero.  Likewise for Y.
6513     if (const ConstantSDNode *N0O1C =
6514         getAsNonOpaqueConstant(N0.getOperand(1))) {
6515       if (const ConstantSDNode *N1O1C =
6516           getAsNonOpaqueConstant(N1.getOperand(1))) {
6517         // We can only do this xform if we know that bits from X that are set in
6518         // C2 but not in C1 are already zero.  Likewise for Y.
6519         const APInt &LHSMask = N0O1C->getAPIntValue();
6520         const APInt &RHSMask = N1O1C->getAPIntValue();
6521
6522         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6523             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6524           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6525                                   N0.getOperand(0), N1.getOperand(0));
6526           return DAG.getNode(ISD::AND, DL, VT, X,
6527                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
6528         }
6529       }
6530     }
6531   }
6532
6533   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6534   if (N0.getOpcode() == ISD::AND &&
6535       N1.getOpcode() == ISD::AND &&
6536       N0.getOperand(0) == N1.getOperand(0) &&
6537       // Don't increase # computations.
6538       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6539     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6540                             N0.getOperand(1), N1.getOperand(1));
6541     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6542   }
6543
6544   return SDValue();
6545 }
6546
6547 /// OR combines for which the commuted variant will be tried as well.
6548 static SDValue visitORCommutative(
6549     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6550   EVT VT = N0.getValueType();
6551   if (N0.getOpcode() == ISD::AND) {
6552     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6553     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6554       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6555
6556     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6557     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6558       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6559   }
6560
6561   return SDValue();
6562 }
6563
6564 SDValue DAGCombiner::visitOR(SDNode *N) {
6565   SDValue N0 = N->getOperand(0);
6566   SDValue N1 = N->getOperand(1);
6567   EVT VT = N1.getValueType();
6568
6569   // x | x --> x
6570   if (N0 == N1)
6571     return N0;
6572
6573   // fold (or c1, c2) -> c1|c2
6574   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6575     return C;
6576
6577   // canonicalize constant to RHS
6578   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6579       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6580     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6581
6582   // fold vector ops
6583   if (VT.isVector()) {
6584     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6585       return FoldedVOp;
6586
6587     // fold (or x, 0) -> x, vector edition
6588     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6589       return N0;
6590
6591     // fold (or x, -1) -> -1, vector edition
6592     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6593       // do not return N1, because undef node may exist in N1
6594       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6595
6596     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6597     // Do this only if the resulting shuffle is legal.
6598     if (isa<ShuffleVectorSDNode>(N0) &&
6599         isa<ShuffleVectorSDNode>(N1) &&
6600         // Avoid folding a node with illegal type.
6601         TLI.isTypeLegal(VT)) {
6602       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6603       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6604       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6605       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6606       // Ensure both shuffles have a zero input.
6607       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6608         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6609         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6610         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6611         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6612         bool CanFold = true;
6613         int NumElts = VT.getVectorNumElements();
6614         SmallVector<int, 4> Mask(NumElts);
6615
6616         for (int i = 0; i != NumElts; ++i) {
6617           int M0 = SV0->getMaskElt(i);
6618           int M1 = SV1->getMaskElt(i);
6619
6620           // Determine if either index is pointing to a zero vector.
6621           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6622           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6623
6624           // If one element is zero and the otherside is undef, keep undef.
6625           // This also handles the case that both are undef.
6626           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6627             Mask[i] = -1;
6628             continue;
6629           }
6630
6631           // Make sure only one of the elements is zero.
6632           if (M0Zero == M1Zero) {
6633             CanFold = false;
6634             break;
6635           }
6636
6637           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6638
6639           // We have a zero and non-zero element. If the non-zero came from
6640           // SV0 make the index a LHS index. If it came from SV1, make it
6641           // a RHS index. We need to mod by NumElts because we don't care
6642           // which operand it came from in the original shuffles.
6643           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6644         }
6645
6646         if (CanFold) {
6647           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6648           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6649
6650           SDValue LegalShuffle =
6651               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6652                                           Mask, DAG);
6653           if (LegalShuffle)
6654             return LegalShuffle;
6655         }
6656       }
6657     }
6658   }
6659
6660   // fold (or x, 0) -> x
6661   if (isNullConstant(N1))
6662     return N0;
6663
6664   // fold (or x, -1) -> -1
6665   if (isAllOnesConstant(N1))
6666     return N1;
6667
6668   if (SDValue NewSel = foldBinOpIntoSelect(N))
6669     return NewSel;
6670
6671   // fold (or x, c) -> c iff (x & ~c) == 0
6672   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6673   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6674     return N1;
6675
6676   if (SDValue Combined = visitORLike(N0, N1, N))
6677     return Combined;
6678
6679   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6680     return Combined;
6681
6682   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6683   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6684     return BSwap;
6685   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6686     return BSwap;
6687
6688   // reassociate or
6689   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6690     return ROR;
6691
6692   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6693   // iff (c1 & c2) != 0 or c1/c2 are undef.
6694   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6695     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6696   };
6697   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6698       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6699     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6700                                                  {N1, N0.getOperand(1)})) {
6701       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6702       AddToWorklist(IOR.getNode());
6703       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6704     }
6705   }
6706
6707   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6708     return Combined;
6709   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6710     return Combined;
6711
6712   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6713   if (N0.getOpcode() == N1.getOpcode())
6714     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6715       return V;
6716
6717   // See if this is some rotate idiom.
6718   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6719     return Rot;
6720
6721   if (SDValue Load = MatchLoadCombine(N))
6722     return Load;
6723
6724   // Simplify the operands using demanded-bits information.
6725   if (SimplifyDemandedBits(SDValue(N, 0)))
6726     return SDValue(N, 0);
6727
6728   // If OR can be rewritten into ADD, try combines based on ADD.
6729   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6730       DAG.haveNoCommonBitsSet(N0, N1))
6731     if (SDValue Combined = visitADDLike(N))
6732       return Combined;
6733
6734   return SDValue();
6735 }
6736
6737 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6738   if (Op.getOpcode() == ISD::AND &&
6739       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6740     Mask = Op.getOperand(1);
6741     return Op.getOperand(0);
6742   }
6743   return Op;
6744 }
6745
6746 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6747 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6748                             SDValue &Mask) {
6749   Op = stripConstantMask(DAG, Op, Mask);
6750   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6751     Shift = Op;
6752     return true;
6753   }
6754   return false;
6755 }
6756
6757 /// Helper function for visitOR to extract the needed side of a rotate idiom
6758 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
6759 /// InstCombine merged some outside op with one of the shifts from
6760 /// the rotate pattern.
6761 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6762 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6763 /// patterns:
6764 ///
6765 ///   (or (add v v) (shrl v bitwidth-1)):
6766 ///     expands (add v v) -> (shl v 1)
6767 ///
6768 ///   (or (mul v c0) (shrl (mul v c1) c2)):
6769 ///     expands (mul v c0) -> (shl (mul v c1) c3)
6770 ///
6771 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
6772 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
6773 ///
6774 ///   (or (shl v c0) (shrl (shl v c1) c2)):
6775 ///     expands (shl v c0) -> (shl (shl v c1) c3)
6776 ///
6777 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
6778 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
6779 ///
6780 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6781 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6782                                      SDValue ExtractFrom, SDValue &Mask,
6783                                      const SDLoc &DL) {
6784   assert(OppShift && ExtractFrom && "Empty SDValue");
6785   assert(
6786       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6787       "Existing shift must be valid as a rotate half");
6788
6789   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6790
6791   // Value and Type of the shift.
6792   SDValue OppShiftLHS = OppShift.getOperand(0);
6793   EVT ShiftedVT = OppShiftLHS.getValueType();
6794
6795   // Amount of the existing shift.
6796   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6797
6798   // (add v v) -> (shl v 1)
6799   // TODO: Should this be a general DAG canonicalization?
6800   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6801       ExtractFrom.getOpcode() == ISD::ADD &&
6802       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6803       ExtractFrom.getOperand(0) == OppShiftLHS &&
6804       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6805     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6806                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6807
6808   // Preconditions:
6809   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6810   //
6811   // Find opcode of the needed shift to be extracted from (op0 v c0).
6812   unsigned Opcode = ISD::DELETED_NODE;
6813   bool IsMulOrDiv = false;
6814   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6815   // opcode or its arithmetic (mul or udiv) variant.
6816   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6817     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6818     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6819       return false;
6820     Opcode = NeededShift;
6821     return true;
6822   };
6823   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6824   // that the needed shift can be extracted from.
6825   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6826       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6827     return SDValue();
6828
6829   // op0 must be the same opcode on both sides, have the same LHS argument,
6830   // and produce the same value type.
6831   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6832       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6833       ShiftedVT != ExtractFrom.getValueType())
6834     return SDValue();
6835
6836   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6837   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6838   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6839   ConstantSDNode *ExtractFromCst =
6840       isConstOrConstSplat(ExtractFrom.getOperand(1));
6841   // TODO: We should be able to handle non-uniform constant vectors for these values
6842   // Check that we have constant values.
6843   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6844       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6845       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6846     return SDValue();
6847
6848   // Compute the shift amount we need to extract to complete the rotate.
6849   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6850   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6851     return SDValue();
6852   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6853   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6854   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6855   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6856   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6857
6858   // Now try extract the needed shift from the ExtractFrom op and see if the
6859   // result matches up with the existing shift's LHS op.
6860   if (IsMulOrDiv) {
6861     // Op to extract from is a mul or udiv by a constant.
6862     // Check:
6863     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6864     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6865     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6866                                                  NeededShiftAmt.getZExtValue());
6867     APInt ResultAmt;
6868     APInt Rem;
6869     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6870     if (Rem != 0 || ResultAmt != OppLHSAmt)
6871       return SDValue();
6872   } else {
6873     // Op to extract from is a shift by a constant.
6874     // Check:
6875     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6876     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6877                                           ExtractFromAmt.getBitWidth()))
6878       return SDValue();
6879   }
6880
6881   // Return the expanded shift op that should allow a rotate to be formed.
6882   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6883   EVT ResVT = ExtractFrom.getValueType();
6884   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6885   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6886 }
6887
6888 // Return true if we can prove that, whenever Neg and Pos are both in the
6889 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6890 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6891 //
6892 //     (or (shift1 X, Neg), (shift2 X, Pos))
6893 //
6894 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6895 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6896 // to consider shift amounts with defined behavior.
6897 //
6898 // The IsRotate flag should be set when the LHS of both shifts is the same.
6899 // Otherwise if matching a general funnel shift, it should be clear.
6900 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6901                            SelectionDAG &DAG, bool IsRotate) {
6902   // If EltSize is a power of 2 then:
6903   //
6904   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6905   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6906   //
6907   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6908   // for the stronger condition:
6909   //
6910   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6911   //
6912   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6913   // we can just replace Neg with Neg' for the rest of the function.
6914   //
6915   // In other cases we check for the even stronger condition:
6916   //
6917   //     Neg == EltSize - Pos                                    [B]
6918   //
6919   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6920   // behavior if Pos == 0 (and consequently Neg == EltSize).
6921   //
6922   // We could actually use [A] whenever EltSize is a power of 2, but the
6923   // only extra cases that it would match are those uninteresting ones
6924   // where Neg and Pos are never in range at the same time.  E.g. for
6925   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6926   // as well as (sub 32, Pos), but:
6927   //
6928   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6929   //
6930   // always invokes undefined behavior for 32-bit X.
6931   //
6932   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6933   //
6934   // NOTE: We can only do this when matching an AND and not a general
6935   // funnel shift.
6936   unsigned MaskLoBits = 0;
6937   if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6938     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6939       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6940       unsigned Bits = Log2_64(EltSize);
6941       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6942           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6943         Neg = Neg.getOperand(0);
6944         MaskLoBits = Bits;
6945       }
6946     }
6947   }
6948
6949   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6950   if (Neg.getOpcode() != ISD::SUB)
6951     return false;
6952   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6953   if (!NegC)
6954     return false;
6955   SDValue NegOp1 = Neg.getOperand(1);
6956
6957   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6958   // Pos'.  The truncation is redundant for the purpose of the equality.
6959   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6960     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6961       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6962       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6963           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6964            MaskLoBits))
6965         Pos = Pos.getOperand(0);
6966     }
6967   }
6968
6969   // The condition we need is now:
6970   //
6971   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6972   //
6973   // If NegOp1 == Pos then we need:
6974   //
6975   //              EltSize & Mask == NegC & Mask
6976   //
6977   // (because "x & Mask" is a truncation and distributes through subtraction).
6978   //
6979   // We also need to account for a potential truncation of NegOp1 if the amount
6980   // has already been legalized to a shift amount type.
6981   APInt Width;
6982   if ((Pos == NegOp1) ||
6983       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6984     Width = NegC->getAPIntValue();
6985
6986   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6987   // Then the condition we want to prove becomes:
6988   //
6989   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6990   //
6991   // which, again because "x & Mask" is a truncation, becomes:
6992   //
6993   //                NegC & Mask == (EltSize - PosC) & Mask
6994   //             EltSize & Mask == (NegC + PosC) & Mask
6995   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6996     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6997       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6998     else
6999       return false;
7000   } else
7001     return false;
7002
7003   // Now we just need to check that EltSize & Mask == Width & Mask.
7004   if (MaskLoBits)
7005     // EltSize & Mask is 0 since Mask is EltSize - 1.
7006     return Width.getLoBits(MaskLoBits) == 0;
7007   return Width == EltSize;
7008 }
7009
7010 // A subroutine of MatchRotate used once we have found an OR of two opposite
7011 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
7012 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
7013 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
7014 // Neg with outer conversions stripped away.
7015 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
7016                                        SDValue Neg, SDValue InnerPos,
7017                                        SDValue InnerNeg, unsigned PosOpcode,
7018                                        unsigned NegOpcode, const SDLoc &DL) {
7019   // fold (or (shl x, (*ext y)),
7020   //          (srl x, (*ext (sub 32, y)))) ->
7021   //   (rotl x, y) or (rotr x, (sub 32, y))
7022   //
7023   // fold (or (shl x, (*ext (sub 32, y))),
7024   //          (srl x, (*ext y))) ->
7025   //   (rotr x, y) or (rotl x, (sub 32, y))
7026   EVT VT = Shifted.getValueType();
7027   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
7028                      /*IsRotate*/ true)) {
7029     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
7030     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
7031                        HasPos ? Pos : Neg);
7032   }
7033
7034   return SDValue();
7035 }
7036
7037 // A subroutine of MatchRotate used once we have found an OR of two opposite
7038 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
7039 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
7040 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
7041 // Neg with outer conversions stripped away.
7042 // TODO: Merge with MatchRotatePosNeg.
7043 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
7044                                        SDValue Neg, SDValue InnerPos,
7045                                        SDValue InnerNeg, unsigned PosOpcode,
7046                                        unsigned NegOpcode, const SDLoc &DL) {
7047   EVT VT = N0.getValueType();
7048   unsigned EltBits = VT.getScalarSizeInBits();
7049
7050   // fold (or (shl x0, (*ext y)),
7051   //          (srl x1, (*ext (sub 32, y)))) ->
7052   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
7053   //
7054   // fold (or (shl x0, (*ext (sub 32, y))),
7055   //          (srl x1, (*ext y))) ->
7056   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
7057   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
7058     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
7059     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
7060                        HasPos ? Pos : Neg);
7061   }
7062
7063   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
7064   // so for now just use the PosOpcode case if its legal.
7065   // TODO: When can we use the NegOpcode case?
7066   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
7067     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
7068       if (Op.getOpcode() != BinOpc)
7069         return false;
7070       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
7071       return Cst && (Cst->getAPIntValue() == Imm);
7072     };
7073
7074     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
7075     //   -> (fshl x0, x1, y)
7076     if (IsBinOpImm(N1, ISD::SRL, 1) &&
7077         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
7078         InnerPos == InnerNeg.getOperand(0) &&
7079         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
7080       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
7081     }
7082
7083     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
7084     //   -> (fshr x0, x1, y)
7085     if (IsBinOpImm(N0, ISD::SHL, 1) &&
7086         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7087         InnerNeg == InnerPos.getOperand(0) &&
7088         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7089       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7090     }
7091
7092     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
7093     //   -> (fshr x0, x1, y)
7094     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
7095     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
7096         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7097         InnerNeg == InnerPos.getOperand(0) &&
7098         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7099       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7100     }
7101   }
7102
7103   return SDValue();
7104 }
7105
7106 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
7107 // idioms for rotate, and if the target supports rotation instructions, generate
7108 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
7109 // with different shifted sources.
7110 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
7111   EVT VT = LHS.getValueType();
7112
7113   // The target must have at least one rotate/funnel flavor.
7114   // We still try to match rotate by constant pre-legalization.
7115   // TODO: Support pre-legalization funnel-shift by constant.
7116   bool HasROTL = hasOperation(ISD::ROTL, VT);
7117   bool HasROTR = hasOperation(ISD::ROTR, VT);
7118   bool HasFSHL = hasOperation(ISD::FSHL, VT);
7119   bool HasFSHR = hasOperation(ISD::FSHR, VT);
7120   if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7121     return SDValue();
7122
7123   // Check for truncated rotate.
7124   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
7125       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
7126     assert(LHS.getValueType() == RHS.getValueType());
7127     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
7128       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
7129     }
7130   }
7131
7132   // Match "(X shl/srl V1) & V2" where V2 may not be present.
7133   SDValue LHSShift;   // The shift.
7134   SDValue LHSMask;    // AND value if any.
7135   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
7136
7137   SDValue RHSShift;   // The shift.
7138   SDValue RHSMask;    // AND value if any.
7139   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
7140
7141   // If neither side matched a rotate half, bail
7142   if (!LHSShift && !RHSShift)
7143     return SDValue();
7144
7145   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
7146   // side of the rotate, so try to handle that here. In all cases we need to
7147   // pass the matched shift from the opposite side to compute the opcode and
7148   // needed shift amount to extract.  We still want to do this if both sides
7149   // matched a rotate half because one half may be a potential overshift that
7150   // can be broken down (ie if InstCombine merged two shl or srl ops into a
7151   // single one).
7152
7153   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
7154   if (LHSShift)
7155     if (SDValue NewRHSShift =
7156             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
7157       RHSShift = NewRHSShift;
7158   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
7159   if (RHSShift)
7160     if (SDValue NewLHSShift =
7161             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
7162       LHSShift = NewLHSShift;
7163
7164   // If a side is still missing, nothing else we can do.
7165   if (!RHSShift || !LHSShift)
7166     return SDValue();
7167
7168   // At this point we've matched or extracted a shift op on each side.
7169
7170   if (LHSShift.getOpcode() == RHSShift.getOpcode())
7171     return SDValue(); // Shifts must disagree.
7172
7173   // TODO: Support pre-legalization funnel-shift by constant.
7174   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7175   if (!IsRotate && !(HasFSHL || HasFSHR))
7176     return SDValue(); // Requires funnel shift support.
7177
7178   // Canonicalize shl to left side in a shl/srl pair.
7179   if (RHSShift.getOpcode() == ISD::SHL) {
7180     std::swap(LHS, RHS);
7181     std::swap(LHSShift, RHSShift);
7182     std::swap(LHSMask, RHSMask);
7183   }
7184
7185   unsigned EltSizeInBits = VT.getScalarSizeInBits();
7186   SDValue LHSShiftArg = LHSShift.getOperand(0);
7187   SDValue LHSShiftAmt = LHSShift.getOperand(1);
7188   SDValue RHSShiftArg = RHSShift.getOperand(0);
7189   SDValue RHSShiftAmt = RHSShift.getOperand(1);
7190
7191   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7192   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7193   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7194   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7195   // iff C1+C2 == EltSizeInBits
7196   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7197                                         ConstantSDNode *RHS) {
7198     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7199   };
7200   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7201     SDValue Res;
7202     if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
7203       bool UseROTL = !LegalOperations || HasROTL;
7204       Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7205                         UseROTL ? LHSShiftAmt : RHSShiftAmt);
7206     } else {
7207       bool UseFSHL = !LegalOperations || HasFSHL;
7208       Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7209                         RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
7210     }
7211
7212     // If there is an AND of either shifted operand, apply it to the result.
7213     if (LHSMask.getNode() || RHSMask.getNode()) {
7214       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7215       SDValue Mask = AllOnes;
7216
7217       if (LHSMask.getNode()) {
7218         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7219         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7220                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7221       }
7222       if (RHSMask.getNode()) {
7223         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7224         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7225                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7226       }
7227
7228       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7229     }
7230
7231     return Res;
7232   }
7233
7234   // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
7235   // shift.
7236   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7237     return SDValue();
7238
7239   // If there is a mask here, and we have a variable shift, we can't be sure
7240   // that we're masking out the right stuff.
7241   if (LHSMask.getNode() || RHSMask.getNode())
7242     return SDValue();
7243
7244   // If the shift amount is sign/zext/any-extended just peel it off.
7245   SDValue LExtOp0 = LHSShiftAmt;
7246   SDValue RExtOp0 = RHSShiftAmt;
7247   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7248        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7249        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7250        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7251       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7252        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7253        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7254        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7255     LExtOp0 = LHSShiftAmt.getOperand(0);
7256     RExtOp0 = RHSShiftAmt.getOperand(0);
7257   }
7258
7259   if (IsRotate && (HasROTL || HasROTR)) {
7260     SDValue TryL =
7261         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7262                           RExtOp0, ISD::ROTL, ISD::ROTR, DL);
7263     if (TryL)
7264       return TryL;
7265
7266     SDValue TryR =
7267         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7268                           LExtOp0, ISD::ROTR, ISD::ROTL, DL);
7269     if (TryR)
7270       return TryR;
7271   }
7272
7273   SDValue TryL =
7274       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7275                         LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7276   if (TryL)
7277     return TryL;
7278
7279   SDValue TryR =
7280       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7281                         RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7282   if (TryR)
7283     return TryR;
7284
7285   return SDValue();
7286 }
7287
7288 namespace {
7289
7290 /// Represents known origin of an individual byte in load combine pattern. The
7291 /// value of the byte is either constant zero or comes from memory.
7292 struct ByteProvider {
7293   // For constant zero providers Load is set to nullptr. For memory providers
7294   // Load represents the node which loads the byte from memory.
7295   // ByteOffset is the offset of the byte in the value produced by the load.
7296   LoadSDNode *Load = nullptr;
7297   unsigned ByteOffset = 0;
7298
7299   ByteProvider() = default;
7300
7301   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7302     return ByteProvider(Load, ByteOffset);
7303   }
7304
7305   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7306
7307   bool isConstantZero() const { return !Load; }
7308   bool isMemory() const { return Load; }
7309
7310   bool operator==(const ByteProvider &Other) const {
7311     return Other.Load == Load && Other.ByteOffset == ByteOffset;
7312   }
7313
7314 private:
7315   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7316       : Load(Load), ByteOffset(ByteOffset) {}
7317 };
7318
7319 } // end anonymous namespace
7320
7321 /// Recursively traverses the expression calculating the origin of the requested
7322 /// byte of the given value. Returns None if the provider can't be calculated.
7323 ///
7324 /// For all the values except the root of the expression verifies that the value
7325 /// has exactly one use and if it's not true return None. This way if the origin
7326 /// of the byte is returned it's guaranteed that the values which contribute to
7327 /// the byte are not used outside of this expression.
7328 ///
7329 /// Because the parts of the expression are not allowed to have more than one
7330 /// use this function iterates over trees, not DAGs. So it never visits the same
7331 /// node more than once.
7332 static const Optional<ByteProvider>
7333 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7334                       bool Root = false) {
7335   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7336   if (Depth == 10)
7337     return None;
7338
7339   if (!Root && !Op.hasOneUse())
7340     return None;
7341
7342   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
7343   unsigned BitWidth = Op.getValueSizeInBits();
7344   if (BitWidth % 8 != 0)
7345     return None;
7346   unsigned ByteWidth = BitWidth / 8;
7347   assert(Index < ByteWidth && "invalid index requested");
7348   (void) ByteWidth;
7349
7350   switch (Op.getOpcode()) {
7351   case ISD::OR: {
7352     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7353     if (!LHS)
7354       return None;
7355     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7356     if (!RHS)
7357       return None;
7358
7359     if (LHS->isConstantZero())
7360       return RHS;
7361     if (RHS->isConstantZero())
7362       return LHS;
7363     return None;
7364   }
7365   case ISD::SHL: {
7366     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7367     if (!ShiftOp)
7368       return None;
7369
7370     uint64_t BitShift = ShiftOp->getZExtValue();
7371     if (BitShift % 8 != 0)
7372       return None;
7373     uint64_t ByteShift = BitShift / 8;
7374
7375     return Index < ByteShift
7376                ? ByteProvider::getConstantZero()
7377                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7378                                        Depth + 1);
7379   }
7380   case ISD::ANY_EXTEND:
7381   case ISD::SIGN_EXTEND:
7382   case ISD::ZERO_EXTEND: {
7383     SDValue NarrowOp = Op->getOperand(0);
7384     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7385     if (NarrowBitWidth % 8 != 0)
7386       return None;
7387     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7388
7389     if (Index >= NarrowByteWidth)
7390       return Op.getOpcode() == ISD::ZERO_EXTEND
7391                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7392                  : None;
7393     return calculateByteProvider(NarrowOp, Index, Depth + 1);
7394   }
7395   case ISD::BSWAP:
7396     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7397                                  Depth + 1);
7398   case ISD::LOAD: {
7399     auto L = cast<LoadSDNode>(Op.getNode());
7400     if (!L->isSimple() || L->isIndexed())
7401       return None;
7402
7403     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7404     if (NarrowBitWidth % 8 != 0)
7405       return None;
7406     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7407
7408     if (Index >= NarrowByteWidth)
7409       return L->getExtensionType() == ISD::ZEXTLOAD
7410                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7411                  : None;
7412     return ByteProvider::getMemory(L, Index);
7413   }
7414   }
7415
7416   return None;
7417 }
7418
7419 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7420   return i;
7421 }
7422
7423 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7424   return BW - i - 1;
7425 }
7426
7427 // Check if the bytes offsets we are looking at match with either big or
7428 // little endian value loaded. Return true for big endian, false for little
7429 // endian, and None if match failed.
7430 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7431                                   int64_t FirstOffset) {
7432   // The endian can be decided only when it is 2 bytes at least.
7433   unsigned Width = ByteOffsets.size();
7434   if (Width < 2)
7435     return None;
7436
7437   bool BigEndian = true, LittleEndian = true;
7438   for (unsigned i = 0; i < Width; i++) {
7439     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7440     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7441     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7442     if (!BigEndian && !LittleEndian)
7443       return None;
7444   }
7445
7446   assert((BigEndian != LittleEndian) && "It should be either big endian or"
7447                                         "little endian");
7448   return BigEndian;
7449 }
7450
7451 static SDValue stripTruncAndExt(SDValue Value) {
7452   switch (Value.getOpcode()) {
7453   case ISD::TRUNCATE:
7454   case ISD::ZERO_EXTEND:
7455   case ISD::SIGN_EXTEND:
7456   case ISD::ANY_EXTEND:
7457     return stripTruncAndExt(Value.getOperand(0));
7458   }
7459   return Value;
7460 }
7461
7462 /// Match a pattern where a wide type scalar value is stored by several narrow
7463 /// stores. Fold it into a single store or a BSWAP and a store if the targets
7464 /// supports it.
7465 ///
7466 /// Assuming little endian target:
7467 ///  i8 *p = ...
7468 ///  i32 val = ...
7469 ///  p[0] = (val >> 0) & 0xFF;
7470 ///  p[1] = (val >> 8) & 0xFF;
7471 ///  p[2] = (val >> 16) & 0xFF;
7472 ///  p[3] = (val >> 24) & 0xFF;
7473 /// =>
7474 ///  *((i32)p) = val;
7475 ///
7476 ///  i8 *p = ...
7477 ///  i32 val = ...
7478 ///  p[0] = (val >> 24) & 0xFF;
7479 ///  p[1] = (val >> 16) & 0xFF;
7480 ///  p[2] = (val >> 8) & 0xFF;
7481 ///  p[3] = (val >> 0) & 0xFF;
7482 /// =>
7483 ///  *((i32)p) = BSWAP(val);
7484 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7485   // The matching looks for "store (trunc x)" patterns that appear early but are
7486   // likely to be replaced by truncating store nodes during combining.
7487   // TODO: If there is evidence that running this later would help, this
7488   //       limitation could be removed. Legality checks may need to be added
7489   //       for the created store and optional bswap/rotate.
7490   if (LegalOperations || OptLevel == CodeGenOpt::None)
7491     return SDValue();
7492
7493   // We only handle merging simple stores of 1-4 bytes.
7494   // TODO: Allow unordered atomics when wider type is legal (see D66309)
7495   EVT MemVT = N->getMemoryVT();
7496   if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7497       !N->isSimple() || N->isIndexed())
7498     return SDValue();
7499
7500   // Collect all of the stores in the chain.
7501   SDValue Chain = N->getChain();
7502   SmallVector<StoreSDNode *, 8> Stores = {N};
7503   while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7504     // All stores must be the same size to ensure that we are writing all of the
7505     // bytes in the wide value.
7506     // TODO: We could allow multiple sizes by tracking each stored byte.
7507     if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7508         Store->isIndexed())
7509       return SDValue();
7510     Stores.push_back(Store);
7511     Chain = Store->getChain();
7512   }
7513   // There is no reason to continue if we do not have at least a pair of stores.
7514   if (Stores.size() < 2)
7515     return SDValue();
7516
7517   // Handle simple types only.
7518   LLVMContext &Context = *DAG.getContext();
7519   unsigned NumStores = Stores.size();
7520   unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7521   unsigned WideNumBits = NumStores * NarrowNumBits;
7522   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7523   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7524     return SDValue();
7525
7526   // Check if all bytes of the source value that we are looking at are stored
7527   // to the same base address. Collect offsets from Base address into OffsetMap.
7528   SDValue SourceValue;
7529   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7530   int64_t FirstOffset = INT64_MAX;
7531   StoreSDNode *FirstStore = nullptr;
7532   Optional<BaseIndexOffset> Base;
7533   for (auto Store : Stores) {
7534     // All the stores store different parts of the CombinedValue. A truncate is
7535     // required to get the partial value.
7536     SDValue Trunc = Store->getValue();
7537     if (Trunc.getOpcode() != ISD::TRUNCATE)
7538       return SDValue();
7539     // Other than the first/last part, a shift operation is required to get the
7540     // offset.
7541     int64_t Offset = 0;
7542     SDValue WideVal = Trunc.getOperand(0);
7543     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7544         isa<ConstantSDNode>(WideVal.getOperand(1))) {
7545       // The shift amount must be a constant multiple of the narrow type.
7546       // It is translated to the offset address in the wide source value "y".
7547       //
7548       // x = srl y, ShiftAmtC
7549       // i8 z = trunc x
7550       // store z, ...
7551       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7552       if (ShiftAmtC % NarrowNumBits != 0)
7553         return SDValue();
7554
7555       Offset = ShiftAmtC / NarrowNumBits;
7556       WideVal = WideVal.getOperand(0);
7557     }
7558
7559     // Stores must share the same source value with different offsets.
7560     // Truncate and extends should be stripped to get the single source value.
7561     if (!SourceValue)
7562       SourceValue = WideVal;
7563     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7564       return SDValue();
7565     else if (SourceValue.getValueType() != WideVT) {
7566       if (WideVal.getValueType() == WideVT ||
7567           WideVal.getScalarValueSizeInBits() >
7568               SourceValue.getScalarValueSizeInBits())
7569         SourceValue = WideVal;
7570       // Give up if the source value type is smaller than the store size.
7571       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7572         return SDValue();
7573     }
7574
7575     // Stores must share the same base address.
7576     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7577     int64_t ByteOffsetFromBase = 0;
7578     if (!Base)
7579       Base = Ptr;
7580     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7581       return SDValue();
7582
7583     // Remember the first store.
7584     if (ByteOffsetFromBase < FirstOffset) {
7585       FirstStore = Store;
7586       FirstOffset = ByteOffsetFromBase;
7587     }
7588     // Map the offset in the store and the offset in the combined value, and
7589     // early return if it has been set before.
7590     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7591       return SDValue();
7592     OffsetMap[Offset] = ByteOffsetFromBase;
7593   }
7594
7595   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7596   assert(FirstStore && "First store must be set");
7597
7598   // Check that a store of the wide type is both allowed and fast on the target
7599   const DataLayout &Layout = DAG.getDataLayout();
7600   bool Fast = false;
7601   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7602                                         *FirstStore->getMemOperand(), &Fast);
7603   if (!Allowed || !Fast)
7604     return SDValue();
7605
7606   // Check if the pieces of the value are going to the expected places in memory
7607   // to merge the stores.
7608   auto checkOffsets = [&](bool MatchLittleEndian) {
7609     if (MatchLittleEndian) {
7610       for (unsigned i = 0; i != NumStores; ++i)
7611         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7612           return false;
7613     } else { // MatchBigEndian by reversing loop counter.
7614       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7615         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7616           return false;
7617     }
7618     return true;
7619   };
7620
7621   // Check if the offsets line up for the native data layout of this target.
7622   bool NeedBswap = false;
7623   bool NeedRotate = false;
7624   if (!checkOffsets(Layout.isLittleEndian())) {
7625     // Special-case: check if byte offsets line up for the opposite endian.
7626     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7627       NeedBswap = true;
7628     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7629       NeedRotate = true;
7630     else
7631       return SDValue();
7632   }
7633
7634   SDLoc DL(N);
7635   if (WideVT != SourceValue.getValueType()) {
7636     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
7637            "Unexpected store value to merge");
7638     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7639   }
7640
7641   // Before legalize we can introduce illegal bswaps/rotates which will be later
7642   // converted to an explicit bswap sequence. This way we end up with a single
7643   // store and byte shuffling instead of several stores and byte shuffling.
7644   if (NeedBswap) {
7645     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7646   } else if (NeedRotate) {
7647     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
7648     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7649     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7650   }
7651
7652   SDValue NewStore =
7653       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7654                    FirstStore->getPointerInfo(), FirstStore->getAlign());
7655
7656   // Rely on other DAG combine rules to remove the other individual stores.
7657   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7658   return NewStore;
7659 }
7660
7661 /// Match a pattern where a wide type scalar value is loaded by several narrow
7662 /// loads and combined by shifts and ors. Fold it into a single load or a load
7663 /// and a BSWAP if the targets supports it.
7664 ///
7665 /// Assuming little endian target:
7666 ///  i8 *a = ...
7667 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7668 /// =>
7669 ///  i32 val = *((i32)a)
7670 ///
7671 ///  i8 *a = ...
7672 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7673 /// =>
7674 ///  i32 val = BSWAP(*((i32)a))
7675 ///
7676 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7677 /// interact well with the worklist mechanism. When a part of the pattern is
7678 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7679 /// but the root node of the pattern which triggers the load combine is not
7680 /// necessarily a direct user of the changed node. For example, once the address
7681 /// of t28 load is reassociated load combine won't be triggered:
7682 ///             t25: i32 = add t4, Constant:i32<2>
7683 ///           t26: i64 = sign_extend t25
7684 ///        t27: i64 = add t2, t26
7685 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7686 ///     t29: i32 = zero_extend t28
7687 ///   t32: i32 = shl t29, Constant:i8<8>
7688 /// t33: i32 = or t23, t32
7689 /// As a possible fix visitLoad can check if the load can be a part of a load
7690 /// combine pattern and add corresponding OR roots to the worklist.
7691 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7692   assert(N->getOpcode() == ISD::OR &&
7693          "Can only match load combining against OR nodes");
7694
7695   // Handles simple types only
7696   EVT VT = N->getValueType(0);
7697   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7698     return SDValue();
7699   unsigned ByteWidth = VT.getSizeInBits() / 8;
7700
7701   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7702   auto MemoryByteOffset = [&] (ByteProvider P) {
7703     assert(P.isMemory() && "Must be a memory byte provider");
7704     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7705     assert(LoadBitWidth % 8 == 0 &&
7706            "can only analyze providers for individual bytes not bit");
7707     unsigned LoadByteWidth = LoadBitWidth / 8;
7708     return IsBigEndianTarget
7709             ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7710             : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7711   };
7712
7713   Optional<BaseIndexOffset> Base;
7714   SDValue Chain;
7715
7716   SmallPtrSet<LoadSDNode *, 8> Loads;
7717   Optional<ByteProvider> FirstByteProvider;
7718   int64_t FirstOffset = INT64_MAX;
7719
7720   // Check if all the bytes of the OR we are looking at are loaded from the same
7721   // base address. Collect bytes offsets from Base address in ByteOffsets.
7722   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7723   unsigned ZeroExtendedBytes = 0;
7724   for (int i = ByteWidth - 1; i >= 0; --i) {
7725     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7726     if (!P)
7727       return SDValue();
7728
7729     if (P->isConstantZero()) {
7730       // It's OK for the N most significant bytes to be 0, we can just
7731       // zero-extend the load.
7732       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7733         return SDValue();
7734       continue;
7735     }
7736     assert(P->isMemory() && "provenance should either be memory or zero");
7737
7738     LoadSDNode *L = P->Load;
7739     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7740            !L->isIndexed() &&
7741            "Must be enforced by calculateByteProvider");
7742     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7743
7744     // All loads must share the same chain
7745     SDValue LChain = L->getChain();
7746     if (!Chain)
7747       Chain = LChain;
7748     else if (Chain != LChain)
7749       return SDValue();
7750
7751     // Loads must share the same base address
7752     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7753     int64_t ByteOffsetFromBase = 0;
7754     if (!Base)
7755       Base = Ptr;
7756     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7757       return SDValue();
7758
7759     // Calculate the offset of the current byte from the base address
7760     ByteOffsetFromBase += MemoryByteOffset(*P);
7761     ByteOffsets[i] = ByteOffsetFromBase;
7762
7763     // Remember the first byte load
7764     if (ByteOffsetFromBase < FirstOffset) {
7765       FirstByteProvider = P;
7766       FirstOffset = ByteOffsetFromBase;
7767     }
7768
7769     Loads.insert(L);
7770   }
7771   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7772          "memory, so there must be at least one load which produces the value");
7773   assert(Base && "Base address of the accessed memory location must be set");
7774   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7775
7776   bool NeedsZext = ZeroExtendedBytes > 0;
7777
7778   EVT MemVT =
7779       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7780
7781   if (!MemVT.isSimple())
7782     return SDValue();
7783
7784   // Before legalize we can introduce too wide illegal loads which will be later
7785   // split into legal sized loads. This enables us to combine i64 load by i8
7786   // patterns to a couple of i32 loads on 32 bit targets.
7787   if (LegalOperations &&
7788       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7789                             MemVT))
7790     return SDValue();
7791
7792   // Check if the bytes of the OR we are looking at match with either big or
7793   // little endian value load
7794   Optional<bool> IsBigEndian = isBigEndian(
7795       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7796   if (!IsBigEndian.hasValue())
7797     return SDValue();
7798
7799   assert(FirstByteProvider && "must be set");
7800
7801   // Ensure that the first byte is loaded from zero offset of the first load.
7802   // So the combined value can be loaded from the first load address.
7803   if (MemoryByteOffset(*FirstByteProvider) != 0)
7804     return SDValue();
7805   LoadSDNode *FirstLoad = FirstByteProvider->Load;
7806
7807   // The node we are looking at matches with the pattern, check if we can
7808   // replace it with a single (possibly zero-extended) load and bswap + shift if
7809   // needed.
7810
7811   // If the load needs byte swap check if the target supports it
7812   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7813
7814   // Before legalize we can introduce illegal bswaps which will be later
7815   // converted to an explicit bswap sequence. This way we end up with a single
7816   // load and byte shuffling instead of several loads and byte shuffling.
7817   // We do not introduce illegal bswaps when zero-extending as this tends to
7818   // introduce too many arithmetic instructions.
7819   if (NeedsBswap && (LegalOperations || NeedsZext) &&
7820       !TLI.isOperationLegal(ISD::BSWAP, VT))
7821     return SDValue();
7822
7823   // If we need to bswap and zero extend, we have to insert a shift. Check that
7824   // it is legal.
7825   if (NeedsBswap && NeedsZext && LegalOperations &&
7826       !TLI.isOperationLegal(ISD::SHL, VT))
7827     return SDValue();
7828
7829   // Check that a load of the wide type is both allowed and fast on the target
7830   bool Fast = false;
7831   bool Allowed =
7832       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7833                              *FirstLoad->getMemOperand(), &Fast);
7834   if (!Allowed || !Fast)
7835     return SDValue();
7836
7837   SDValue NewLoad =
7838       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7839                      Chain, FirstLoad->getBasePtr(),
7840                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7841
7842   // Transfer chain users from old loads to the new load.
7843   for (LoadSDNode *L : Loads)
7844     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7845
7846   if (!NeedsBswap)
7847     return NewLoad;
7848
7849   SDValue ShiftedLoad =
7850       NeedsZext
7851           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7852                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7853                                                    SDLoc(N), LegalOperations))
7854           : NewLoad;
7855   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7856 }
7857
7858 // If the target has andn, bsl, or a similar bit-select instruction,
7859 // we want to unfold masked merge, with canonical pattern of:
7860 //   |        A  |  |B|
7861 //   ((x ^ y) & m) ^ y
7862 //    |  D  |
7863 // Into:
7864 //   (x & m) | (y & ~m)
7865 // If y is a constant, m is not a 'not', and the 'andn' does not work with
7866 // immediates, we unfold into a different pattern:
7867 //   ~(~x & m) & (m | y)
7868 // If x is a constant, m is a 'not', and the 'andn' does not work with
7869 // immediates, we unfold into a different pattern:
7870 //   (x | ~m) & ~(~m & ~y)
7871 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7872 //       the very least that breaks andnpd / andnps patterns, and because those
7873 //       patterns are simplified in IR and shouldn't be created in the DAG
7874 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7875   assert(N->getOpcode() == ISD::XOR);
7876
7877   // Don't touch 'not' (i.e. where y = -1).
7878   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7879     return SDValue();
7880
7881   EVT VT = N->getValueType(0);
7882
7883   // There are 3 commutable operators in the pattern,
7884   // so we have to deal with 8 possible variants of the basic pattern.
7885   SDValue X, Y, M;
7886   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7887     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7888       return false;
7889     SDValue Xor = And.getOperand(XorIdx);
7890     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7891       return false;
7892     SDValue Xor0 = Xor.getOperand(0);
7893     SDValue Xor1 = Xor.getOperand(1);
7894     // Don't touch 'not' (i.e. where y = -1).
7895     if (isAllOnesOrAllOnesSplat(Xor1))
7896       return false;
7897     if (Other == Xor0)
7898       std::swap(Xor0, Xor1);
7899     if (Other != Xor1)
7900       return false;
7901     X = Xor0;
7902     Y = Xor1;
7903     M = And.getOperand(XorIdx ? 0 : 1);
7904     return true;
7905   };
7906
7907   SDValue N0 = N->getOperand(0);
7908   SDValue N1 = N->getOperand(1);
7909   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7910       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7911     return SDValue();
7912
7913   // Don't do anything if the mask is constant. This should not be reachable.
7914   // InstCombine should have already unfolded this pattern, and DAGCombiner
7915   // probably shouldn't produce it, too.
7916   if (isa<ConstantSDNode>(M.getNode()))
7917     return SDValue();
7918
7919   // We can transform if the target has AndNot
7920   if (!TLI.hasAndNot(M))
7921     return SDValue();
7922
7923   SDLoc DL(N);
7924
7925   // If Y is a constant, check that 'andn' works with immediates. Unless M is
7926   // a bitwise not that would already allow ANDN to be used.
7927   if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
7928     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7929     // If not, we need to do a bit more work to make sure andn is still used.
7930     SDValue NotX = DAG.getNOT(DL, X, VT);
7931     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7932     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7933     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7934     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7935   }
7936
7937   // If X is a constant and M is a bitwise not, check that 'andn' works with
7938   // immediates.
7939   if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
7940     assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
7941     // If not, we need to do a bit more work to make sure andn is still used.
7942     SDValue NotM = M.getOperand(0);
7943     SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
7944     SDValue NotY = DAG.getNOT(DL, Y, VT);
7945     SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
7946     SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
7947     return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
7948   }
7949
7950   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7951   SDValue NotM = DAG.getNOT(DL, M, VT);
7952   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7953
7954   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7955 }
7956
7957 SDValue DAGCombiner::visitXOR(SDNode *N) {
7958   SDValue N0 = N->getOperand(0);
7959   SDValue N1 = N->getOperand(1);
7960   EVT VT = N0.getValueType();
7961   SDLoc DL(N);
7962
7963   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7964   if (N0.isUndef() && N1.isUndef())
7965     return DAG.getConstant(0, DL, VT);
7966
7967   // fold (xor x, undef) -> undef
7968   if (N0.isUndef())
7969     return N0;
7970   if (N1.isUndef())
7971     return N1;
7972
7973   // fold (xor c1, c2) -> c1^c2
7974   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7975     return C;
7976
7977   // canonicalize constant to RHS
7978   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7979       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7980     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7981
7982   // fold vector ops
7983   if (VT.isVector()) {
7984     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7985       return FoldedVOp;
7986
7987     // fold (xor x, 0) -> x, vector edition
7988     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7989       return N0;
7990   }
7991
7992   // fold (xor x, 0) -> x
7993   if (isNullConstant(N1))
7994     return N0;
7995
7996   if (SDValue NewSel = foldBinOpIntoSelect(N))
7997     return NewSel;
7998
7999   // reassociate xor
8000   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
8001     return RXOR;
8002
8003   // fold !(x cc y) -> (x !cc y)
8004   unsigned N0Opcode = N0.getOpcode();
8005   SDValue LHS, RHS, CC;
8006   if (TLI.isConstTrueVal(N1.getNode()) &&
8007       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
8008     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
8009                                                LHS.getValueType());
8010     if (!LegalOperations ||
8011         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
8012       switch (N0Opcode) {
8013       default:
8014         llvm_unreachable("Unhandled SetCC Equivalent!");
8015       case ISD::SETCC:
8016         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
8017       case ISD::SELECT_CC:
8018         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
8019                                N0.getOperand(3), NotCC);
8020       case ISD::STRICT_FSETCC:
8021       case ISD::STRICT_FSETCCS: {
8022         if (N0.hasOneUse()) {
8023           // FIXME Can we handle multiple uses? Could we token factor the chain
8024           // results from the new/old setcc?
8025           SDValue SetCC =
8026               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
8027                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
8028           CombineTo(N, SetCC);
8029           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
8030           recursivelyDeleteUnusedNodes(N0.getNode());
8031           return SDValue(N, 0); // Return N so it doesn't get rechecked!
8032         }
8033         break;
8034       }
8035       }
8036     }
8037   }
8038
8039   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
8040   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8041       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
8042     SDValue V = N0.getOperand(0);
8043     SDLoc DL0(N0);
8044     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
8045                     DAG.getConstant(1, DL0, V.getValueType()));
8046     AddToWorklist(V.getNode());
8047     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
8048   }
8049
8050   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
8051   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
8052       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8053     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8054     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
8055       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8056       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8057       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8058       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8059       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8060     }
8061   }
8062   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
8063   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
8064       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8065     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8066     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
8067       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8068       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8069       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8070       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8071       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8072     }
8073   }
8074
8075   // fold (not (neg x)) -> (add X, -1)
8076   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
8077   // Y is a constant or the subtract has a single use.
8078   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
8079       isNullConstant(N0.getOperand(0))) {
8080     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
8081                        DAG.getAllOnesConstant(DL, VT));
8082   }
8083
8084   // fold (not (add X, -1)) -> (neg X)
8085   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
8086       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
8087     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
8088                        N0.getOperand(0));
8089   }
8090
8091   // fold (xor (and x, y), y) -> (and (not x), y)
8092   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
8093     SDValue X = N0.getOperand(0);
8094     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
8095     AddToWorklist(NotX.getNode());
8096     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
8097   }
8098
8099   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
8100     ConstantSDNode *XorC = isConstOrConstSplat(N1);
8101     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
8102     unsigned BitWidth = VT.getScalarSizeInBits();
8103     if (XorC && ShiftC) {
8104       // Don't crash on an oversized shift. We can not guarantee that a bogus
8105       // shift has been simplified to undef.
8106       uint64_t ShiftAmt = ShiftC->getLimitedValue();
8107       if (ShiftAmt < BitWidth) {
8108         APInt Ones = APInt::getAllOnes(BitWidth);
8109         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
8110         if (XorC->getAPIntValue() == Ones) {
8111           // If the xor constant is a shifted -1, do a 'not' before the shift:
8112           // xor (X << ShiftC), XorC --> (not X) << ShiftC
8113           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
8114           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
8115           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
8116         }
8117       }
8118     }
8119   }
8120
8121   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
8122   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
8123     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
8124     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
8125     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
8126       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
8127       SDValue S0 = S.getOperand(0);
8128       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
8129         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
8130           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
8131             return DAG.getNode(ISD::ABS, DL, VT, S0);
8132     }
8133   }
8134
8135   // fold (xor x, x) -> 0
8136   if (N0 == N1)
8137     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
8138
8139   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
8140   // Here is a concrete example of this equivalence:
8141   // i16   x ==  14
8142   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
8143   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
8144   //
8145   // =>
8146   //
8147   // i16     ~1      == 0b1111111111111110
8148   // i16 rol(~1, 14) == 0b1011111111111111
8149   //
8150   // Some additional tips to help conceptualize this transform:
8151   // - Try to see the operation as placing a single zero in a value of all ones.
8152   // - There exists no value for x which would allow the result to contain zero.
8153   // - Values of x larger than the bitwidth are undefined and do not require a
8154   //   consistent result.
8155   // - Pushing the zero left requires shifting one bits in from the right.
8156   // A rotate left of ~1 is a nice way of achieving the desired result.
8157   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
8158       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
8159     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
8160                        N0.getOperand(1));
8161   }
8162
8163   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
8164   if (N0Opcode == N1.getOpcode())
8165     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8166       return V;
8167
8168   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
8169   if (SDValue MM = unfoldMaskedMerge(N))
8170     return MM;
8171
8172   // Simplify the expression using non-local knowledge.
8173   if (SimplifyDemandedBits(SDValue(N, 0)))
8174     return SDValue(N, 0);
8175
8176   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
8177     return Combined;
8178
8179   return SDValue();
8180 }
8181
8182 /// If we have a shift-by-constant of a bitwise logic op that itself has a
8183 /// shift-by-constant operand with identical opcode, we may be able to convert
8184 /// that into 2 independent shifts followed by the logic op. This is a
8185 /// throughput improvement.
8186 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
8187   // Match a one-use bitwise logic op.
8188   SDValue LogicOp = Shift->getOperand(0);
8189   if (!LogicOp.hasOneUse())
8190     return SDValue();
8191
8192   unsigned LogicOpcode = LogicOp.getOpcode();
8193   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8194       LogicOpcode != ISD::XOR)
8195     return SDValue();
8196
8197   // Find a matching one-use shift by constant.
8198   unsigned ShiftOpcode = Shift->getOpcode();
8199   SDValue C1 = Shift->getOperand(1);
8200   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8201   assert(C1Node && "Expected a shift with constant operand");
8202   const APInt &C1Val = C1Node->getAPIntValue();
8203   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8204                              const APInt *&ShiftAmtVal) {
8205     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8206       return false;
8207
8208     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8209     if (!ShiftCNode)
8210       return false;
8211
8212     // Capture the shifted operand and shift amount value.
8213     ShiftOp = V.getOperand(0);
8214     ShiftAmtVal = &ShiftCNode->getAPIntValue();
8215
8216     // Shift amount types do not have to match their operand type, so check that
8217     // the constants are the same width.
8218     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8219       return false;
8220
8221     // The fold is not valid if the sum of the shift values exceeds bitwidth.
8222     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8223       return false;
8224
8225     return true;
8226   };
8227
8228   // Logic ops are commutative, so check each operand for a match.
8229   SDValue X, Y;
8230   const APInt *C0Val;
8231   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8232     Y = LogicOp.getOperand(1);
8233   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8234     Y = LogicOp.getOperand(0);
8235   else
8236     return SDValue();
8237
8238   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8239   SDLoc DL(Shift);
8240   EVT VT = Shift->getValueType(0);
8241   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8242   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8243   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8244   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8245   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8246 }
8247
8248 /// Handle transforms common to the three shifts, when the shift amount is a
8249 /// constant.
8250 /// We are looking for: (shift being one of shl/sra/srl)
8251 ///   shift (binop X, C0), C1
8252 /// And want to transform into:
8253 ///   binop (shift X, C1), (shift C0, C1)
8254 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8255   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
8256
8257   // Do not turn a 'not' into a regular xor.
8258   if (isBitwiseNot(N->getOperand(0)))
8259     return SDValue();
8260
8261   // The inner binop must be one-use, since we want to replace it.
8262   SDValue LHS = N->getOperand(0);
8263   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8264     return SDValue();
8265
8266   // TODO: This is limited to early combining because it may reveal regressions
8267   //       otherwise. But since we just checked a target hook to see if this is
8268   //       desirable, that should have filtered out cases where this interferes
8269   //       with some other pattern matching.
8270   if (!LegalTypes)
8271     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8272       return R;
8273
8274   // We want to pull some binops through shifts, so that we have (and (shift))
8275   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
8276   // thing happens with address calculations, so it's important to canonicalize
8277   // it.
8278   switch (LHS.getOpcode()) {
8279   default:
8280     return SDValue();
8281   case ISD::OR:
8282   case ISD::XOR:
8283   case ISD::AND:
8284     break;
8285   case ISD::ADD:
8286     if (N->getOpcode() != ISD::SHL)
8287       return SDValue(); // only shl(add) not sr[al](add).
8288     break;
8289   }
8290
8291   // We require the RHS of the binop to be a constant and not opaque as well.
8292   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8293   if (!BinOpCst)
8294     return SDValue();
8295
8296   // FIXME: disable this unless the input to the binop is a shift by a constant
8297   // or is copy/select. Enable this in other cases when figure out it's exactly
8298   // profitable.
8299   SDValue BinOpLHSVal = LHS.getOperand(0);
8300   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8301                             BinOpLHSVal.getOpcode() == ISD::SRA ||
8302                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
8303                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8304   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8305                         BinOpLHSVal.getOpcode() == ISD::SELECT;
8306
8307   if (!IsShiftByConstant && !IsCopyOrSelect)
8308     return SDValue();
8309
8310   if (IsCopyOrSelect && N->hasOneUse())
8311     return SDValue();
8312
8313   // Fold the constants, shifting the binop RHS by the shift amount.
8314   SDLoc DL(N);
8315   EVT VT = N->getValueType(0);
8316   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8317                                N->getOperand(1));
8318   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
8319
8320   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8321                                  N->getOperand(1));
8322   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8323 }
8324
8325 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8326   assert(N->getOpcode() == ISD::TRUNCATE);
8327   assert(N->getOperand(0).getOpcode() == ISD::AND);
8328
8329   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8330   EVT TruncVT = N->getValueType(0);
8331   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8332       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8333     SDValue N01 = N->getOperand(0).getOperand(1);
8334     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8335       SDLoc DL(N);
8336       SDValue N00 = N->getOperand(0).getOperand(0);
8337       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8338       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8339       AddToWorklist(Trunc00.getNode());
8340       AddToWorklist(Trunc01.getNode());
8341       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8342     }
8343   }
8344
8345   return SDValue();
8346 }
8347
8348 SDValue DAGCombiner::visitRotate(SDNode *N) {
8349   SDLoc dl(N);
8350   SDValue N0 = N->getOperand(0);
8351   SDValue N1 = N->getOperand(1);
8352   EVT VT = N->getValueType(0);
8353   unsigned Bitsize = VT.getScalarSizeInBits();
8354
8355   // fold (rot x, 0) -> x
8356   if (isNullOrNullSplat(N1))
8357     return N0;
8358
8359   // fold (rot x, c) -> x iff (c % BitSize) == 0
8360   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8361     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8362     if (DAG.MaskedValueIsZero(N1, ModuloMask))
8363       return N0;
8364   }
8365
8366   // fold (rot x, c) -> (rot x, c % BitSize)
8367   bool OutOfRange = false;
8368   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8369     OutOfRange |= C->getAPIntValue().uge(Bitsize);
8370     return true;
8371   };
8372   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8373     EVT AmtVT = N1.getValueType();
8374     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8375     if (SDValue Amt =
8376             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8377       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8378   }
8379
8380   // rot i16 X, 8 --> bswap X
8381   auto *RotAmtC = isConstOrConstSplat(N1);
8382   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8383       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8384     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8385
8386   // Simplify the operands using demanded-bits information.
8387   if (SimplifyDemandedBits(SDValue(N, 0)))
8388     return SDValue(N, 0);
8389
8390   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8391   if (N1.getOpcode() == ISD::TRUNCATE &&
8392       N1.getOperand(0).getOpcode() == ISD::AND) {
8393     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8394       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8395   }
8396
8397   unsigned NextOp = N0.getOpcode();
8398   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8399   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8400     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8401     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8402     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8403       EVT ShiftVT = C1->getValueType(0);
8404       bool SameSide = (N->getOpcode() == NextOp);
8405       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8406       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8407               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8408         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8409         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8410             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8411         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8412                            CombinedShiftNorm);
8413       }
8414     }
8415   }
8416   return SDValue();
8417 }
8418
8419 SDValue DAGCombiner::visitSHL(SDNode *N) {
8420   SDValue N0 = N->getOperand(0);
8421   SDValue N1 = N->getOperand(1);
8422   if (SDValue V = DAG.simplifyShift(N0, N1))
8423     return V;
8424
8425   EVT VT = N0.getValueType();
8426   EVT ShiftVT = N1.getValueType();
8427   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8428
8429   // fold (shl c1, c2) -> c1<<c2
8430   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8431     return C;
8432
8433   // fold vector ops
8434   if (VT.isVector()) {
8435     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8436       return FoldedVOp;
8437
8438     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8439     // If setcc produces all-one true value then:
8440     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8441     if (N1CV && N1CV->isConstant()) {
8442       if (N0.getOpcode() == ISD::AND) {
8443         SDValue N00 = N0->getOperand(0);
8444         SDValue N01 = N0->getOperand(1);
8445         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8446
8447         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8448             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8449                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8450           if (SDValue C =
8451                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8452             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8453         }
8454       }
8455     }
8456   }
8457
8458   if (SDValue NewSel = foldBinOpIntoSelect(N))
8459     return NewSel;
8460
8461   // if (shl x, c) is known to be zero, return 0
8462   if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
8463     return DAG.getConstant(0, SDLoc(N), VT);
8464
8465   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8466   if (N1.getOpcode() == ISD::TRUNCATE &&
8467       N1.getOperand(0).getOpcode() == ISD::AND) {
8468     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8469       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8470   }
8471
8472   if (SimplifyDemandedBits(SDValue(N, 0)))
8473     return SDValue(N, 0);
8474
8475   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8476   if (N0.getOpcode() == ISD::SHL) {
8477     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8478                                           ConstantSDNode *RHS) {
8479       APInt c1 = LHS->getAPIntValue();
8480       APInt c2 = RHS->getAPIntValue();
8481       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8482       return (c1 + c2).uge(OpSizeInBits);
8483     };
8484     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8485       return DAG.getConstant(0, SDLoc(N), VT);
8486
8487     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8488                                        ConstantSDNode *RHS) {
8489       APInt c1 = LHS->getAPIntValue();
8490       APInt c2 = RHS->getAPIntValue();
8491       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8492       return (c1 + c2).ult(OpSizeInBits);
8493     };
8494     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8495       SDLoc DL(N);
8496       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8497       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8498     }
8499   }
8500
8501   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8502   // For this to be valid, the second form must not preserve any of the bits
8503   // that are shifted out by the inner shift in the first form.  This means
8504   // the outer shift size must be >= the number of bits added by the ext.
8505   // As a corollary, we don't care what kind of ext it is.
8506   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8507        N0.getOpcode() == ISD::ANY_EXTEND ||
8508        N0.getOpcode() == ISD::SIGN_EXTEND) &&
8509       N0.getOperand(0).getOpcode() == ISD::SHL) {
8510     SDValue N0Op0 = N0.getOperand(0);
8511     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8512     EVT InnerVT = N0Op0.getValueType();
8513     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8514
8515     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8516                                                          ConstantSDNode *RHS) {
8517       APInt c1 = LHS->getAPIntValue();
8518       APInt c2 = RHS->getAPIntValue();
8519       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8520       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8521              (c1 + c2).uge(OpSizeInBits);
8522     };
8523     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8524                                   /*AllowUndefs*/ false,
8525                                   /*AllowTypeMismatch*/ true))
8526       return DAG.getConstant(0, SDLoc(N), VT);
8527
8528     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8529                                                       ConstantSDNode *RHS) {
8530       APInt c1 = LHS->getAPIntValue();
8531       APInt c2 = RHS->getAPIntValue();
8532       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8533       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8534              (c1 + c2).ult(OpSizeInBits);
8535     };
8536     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8537                                   /*AllowUndefs*/ false,
8538                                   /*AllowTypeMismatch*/ true)) {
8539       SDLoc DL(N);
8540       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8541       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8542       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8543       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8544     }
8545   }
8546
8547   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8548   // Only fold this if the inner zext has no other uses to avoid increasing
8549   // the total number of instructions.
8550   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8551       N0.getOperand(0).getOpcode() == ISD::SRL) {
8552     SDValue N0Op0 = N0.getOperand(0);
8553     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8554
8555     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8556       APInt c1 = LHS->getAPIntValue();
8557       APInt c2 = RHS->getAPIntValue();
8558       zeroExtendToMatch(c1, c2);
8559       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8560     };
8561     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8562                                   /*AllowUndefs*/ false,
8563                                   /*AllowTypeMismatch*/ true)) {
8564       SDLoc DL(N);
8565       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8566       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8567       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8568       AddToWorklist(NewSHL.getNode());
8569       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8570     }
8571   }
8572
8573   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
8574   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
8575   // TODO - support non-uniform vector shift amounts.
8576   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8577   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8578       N0->getFlags().hasExact()) {
8579     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8580       uint64_t C1 = N0C1->getZExtValue();
8581       uint64_t C2 = N1C->getZExtValue();
8582       SDLoc DL(N);
8583       if (C1 <= C2)
8584         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8585                            DAG.getConstant(C2 - C1, DL, ShiftVT));
8586       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8587                          DAG.getConstant(C1 - C2, DL, ShiftVT));
8588     }
8589   }
8590
8591   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8592   //                               (and (srl x, (sub c1, c2), MASK)
8593   // Only fold this if the inner shift has no other uses -- if it does, folding
8594   // this will increase the total number of instructions.
8595   // TODO - drop hasOneUse requirement if c1 == c2?
8596   // TODO - support non-uniform vector shift amounts.
8597   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8598       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8599     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8600       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8601         uint64_t c1 = N0C1->getZExtValue();
8602         uint64_t c2 = N1C->getZExtValue();
8603         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8604         SDValue Shift;
8605         if (c2 > c1) {
8606           Mask <<= c2 - c1;
8607           SDLoc DL(N);
8608           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8609                               DAG.getConstant(c2 - c1, DL, ShiftVT));
8610         } else {
8611           Mask.lshrInPlace(c1 - c2);
8612           SDLoc DL(N);
8613           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8614                               DAG.getConstant(c1 - c2, DL, ShiftVT));
8615         }
8616         SDLoc DL(N0);
8617         return DAG.getNode(ISD::AND, DL, VT, Shift,
8618                            DAG.getConstant(Mask, DL, VT));
8619       }
8620     }
8621   }
8622
8623   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8624   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8625       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8626     SDLoc DL(N);
8627     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8628     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8629     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8630   }
8631
8632   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8633   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8634   // Variant of version done on multiply, except mul by a power of 2 is turned
8635   // into a shift.
8636   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8637       N0.getNode()->hasOneUse() &&
8638       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8639       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8640       TLI.isDesirableToCommuteWithShift(N, Level)) {
8641     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8642     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8643     AddToWorklist(Shl0.getNode());
8644     AddToWorklist(Shl1.getNode());
8645     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8646   }
8647
8648   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8649   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8650       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8651       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8652     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8653     if (isConstantOrConstantVector(Shl))
8654       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8655   }
8656
8657   if (N1C && !N1C->isOpaque())
8658     if (SDValue NewSHL = visitShiftByConstant(N))
8659       return NewSHL;
8660
8661   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8662   if (N0.getOpcode() == ISD::VSCALE)
8663     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8664       const APInt &C0 = N0.getConstantOperandAPInt(0);
8665       const APInt &C1 = NC1->getAPIntValue();
8666       return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8667     }
8668
8669   // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8670   APInt ShlVal;
8671   if (N0.getOpcode() == ISD::STEP_VECTOR)
8672     if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8673       const APInt &C0 = N0.getConstantOperandAPInt(0);
8674       if (ShlVal.ult(C0.getBitWidth())) {
8675         APInt NewStep = C0 << ShlVal;
8676         return DAG.getStepVector(SDLoc(N), VT, NewStep);
8677       }
8678     }
8679
8680   return SDValue();
8681 }
8682
8683 // Transform a right shift of a multiply into a multiply-high.
8684 // Examples:
8685 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8686 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8687 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8688                                   const TargetLowering &TLI) {
8689   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8690          "SRL or SRA node is required here!");
8691
8692   // Check the shift amount. Proceed with the transformation if the shift
8693   // amount is constant.
8694   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8695   if (!ShiftAmtSrc)
8696     return SDValue();
8697
8698   SDLoc DL(N);
8699
8700   // The operation feeding into the shift must be a multiply.
8701   SDValue ShiftOperand = N->getOperand(0);
8702   if (ShiftOperand.getOpcode() != ISD::MUL)
8703     return SDValue();
8704
8705   // Both operands must be equivalent extend nodes.
8706   SDValue LeftOp = ShiftOperand.getOperand(0);
8707   SDValue RightOp = ShiftOperand.getOperand(1);
8708
8709   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8710   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8711
8712   if (!IsSignExt && !IsZeroExt)
8713     return SDValue();
8714
8715   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8716   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8717
8718   SDValue MulhRightOp;
8719   if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
8720     unsigned ActiveBits = IsSignExt
8721                               ? Constant->getAPIntValue().getMinSignedBits()
8722                               : Constant->getAPIntValue().getActiveBits();
8723     if (ActiveBits > NarrowVTSize)
8724       return SDValue();
8725     MulhRightOp = DAG.getConstant(
8726         Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
8727         NarrowVT);
8728   } else {
8729     if (LeftOp.getOpcode() != RightOp.getOpcode())
8730       return SDValue();
8731     // Check that the two extend nodes are the same type.
8732     if (NarrowVT != RightOp.getOperand(0).getValueType())
8733       return SDValue();
8734     MulhRightOp = RightOp.getOperand(0);
8735   }
8736
8737   EVT WideVT = LeftOp.getValueType();
8738   // Proceed with the transformation if the wide types match.
8739   assert((WideVT == RightOp.getValueType()) &&
8740          "Cannot have a multiply node with two different operand types.");
8741
8742   // Proceed with the transformation if the wide type is twice as large
8743   // as the narrow type.
8744   if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
8745     return SDValue();
8746
8747   // Check the shift amount with the narrow type size.
8748   // Proceed with the transformation if the shift amount is the width
8749   // of the narrow type.
8750   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8751   if (ShiftAmt != NarrowVTSize)
8752     return SDValue();
8753
8754   // If the operation feeding into the MUL is a sign extend (sext),
8755   // we use mulhs. Othewise, zero extends (zext) use mulhu.
8756   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8757
8758   // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8759   if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8760     return SDValue();
8761
8762   SDValue Result =
8763       DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
8764   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
8765                                      : DAG.getZExtOrTrunc(Result, DL, WideVT));
8766 }
8767
8768 SDValue DAGCombiner::visitSRA(SDNode *N) {
8769   SDValue N0 = N->getOperand(0);
8770   SDValue N1 = N->getOperand(1);
8771   if (SDValue V = DAG.simplifyShift(N0, N1))
8772     return V;
8773
8774   EVT VT = N0.getValueType();
8775   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8776
8777   // fold (sra c1, c2) -> (sra c1, c2)
8778   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8779     return C;
8780
8781   // Arithmetic shifting an all-sign-bit value is a no-op.
8782   // fold (sra 0, x) -> 0
8783   // fold (sra -1, x) -> -1
8784   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8785     return N0;
8786
8787   // fold vector ops
8788   if (VT.isVector())
8789     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8790       return FoldedVOp;
8791
8792   if (SDValue NewSel = foldBinOpIntoSelect(N))
8793     return NewSel;
8794
8795   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8796   // sext_inreg.
8797   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8798   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8799     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8800     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8801     if (VT.isVector())
8802       ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8803                                VT.getVectorElementCount());
8804     if (!LegalOperations ||
8805         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8806         TargetLowering::Legal)
8807       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8808                          N0.getOperand(0), DAG.getValueType(ExtVT));
8809     // Even if we can't convert to sext_inreg, we might be able to remove
8810     // this shift pair if the input is already sign extended.
8811     if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8812       return N0.getOperand(0);
8813   }
8814
8815   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8816   // clamp (add c1, c2) to max shift.
8817   if (N0.getOpcode() == ISD::SRA) {
8818     SDLoc DL(N);
8819     EVT ShiftVT = N1.getValueType();
8820     EVT ShiftSVT = ShiftVT.getScalarType();
8821     SmallVector<SDValue, 16> ShiftValues;
8822
8823     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8824       APInt c1 = LHS->getAPIntValue();
8825       APInt c2 = RHS->getAPIntValue();
8826       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8827       APInt Sum = c1 + c2;
8828       unsigned ShiftSum =
8829           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8830       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8831       return true;
8832     };
8833     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8834       SDValue ShiftValue;
8835       if (N1.getOpcode() == ISD::BUILD_VECTOR)
8836         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8837       else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
8838         assert(ShiftValues.size() == 1 &&
8839                "Expected matchBinaryPredicate to return one element for "
8840                "SPLAT_VECTORs");
8841         ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
8842       } else
8843         ShiftValue = ShiftValues[0];
8844       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8845     }
8846   }
8847
8848   // fold (sra (shl X, m), (sub result_size, n))
8849   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8850   // result_size - n != m.
8851   // If truncate is free for the target sext(shl) is likely to result in better
8852   // code.
8853   if (N0.getOpcode() == ISD::SHL && N1C) {
8854     // Get the two constanst of the shifts, CN0 = m, CN = n.
8855     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8856     if (N01C) {
8857       LLVMContext &Ctx = *DAG.getContext();
8858       // Determine what the truncate's result bitsize and type would be.
8859       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8860
8861       if (VT.isVector())
8862         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8863
8864       // Determine the residual right-shift amount.
8865       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8866
8867       // If the shift is not a no-op (in which case this should be just a sign
8868       // extend already), the truncated to type is legal, sign_extend is legal
8869       // on that type, and the truncate to that type is both legal and free,
8870       // perform the transform.
8871       if ((ShiftAmt > 0) &&
8872           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8873           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8874           TLI.isTruncateFree(VT, TruncVT)) {
8875         SDLoc DL(N);
8876         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8877             getShiftAmountTy(N0.getOperand(0).getValueType()));
8878         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8879                                     N0.getOperand(0), Amt);
8880         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8881                                     Shift);
8882         return DAG.getNode(ISD::SIGN_EXTEND, DL,
8883                            N->getValueType(0), Trunc);
8884       }
8885     }
8886   }
8887
8888   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8889   //   sra (add (shl X, N1C), AddC), N1C -->
8890   //   sext (add (trunc X to (width - N1C)), AddC')
8891   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8892       N0.getOperand(0).getOpcode() == ISD::SHL &&
8893       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8894     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8895       SDValue Shl = N0.getOperand(0);
8896       // Determine what the truncate's type would be and ask the target if that
8897       // is a free operation.
8898       LLVMContext &Ctx = *DAG.getContext();
8899       unsigned ShiftAmt = N1C->getZExtValue();
8900       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8901       if (VT.isVector())
8902         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8903
8904       // TODO: The simple type check probably belongs in the default hook
8905       //       implementation and/or target-specific overrides (because
8906       //       non-simple types likely require masking when legalized), but that
8907       //       restriction may conflict with other transforms.
8908       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8909           TLI.isTruncateFree(VT, TruncVT)) {
8910         SDLoc DL(N);
8911         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8912         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8913                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8914         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8915         return DAG.getSExtOrTrunc(Add, DL, VT);
8916       }
8917     }
8918   }
8919
8920   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8921   if (N1.getOpcode() == ISD::TRUNCATE &&
8922       N1.getOperand(0).getOpcode() == ISD::AND) {
8923     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8924       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8925   }
8926
8927   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8928   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8929   //      if c1 is equal to the number of bits the trunc removes
8930   // TODO - support non-uniform vector shift amounts.
8931   if (N0.getOpcode() == ISD::TRUNCATE &&
8932       (N0.getOperand(0).getOpcode() == ISD::SRL ||
8933        N0.getOperand(0).getOpcode() == ISD::SRA) &&
8934       N0.getOperand(0).hasOneUse() &&
8935       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8936     SDValue N0Op0 = N0.getOperand(0);
8937     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8938       EVT LargeVT = N0Op0.getValueType();
8939       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8940       if (LargeShift->getAPIntValue() == TruncBits) {
8941         SDLoc DL(N);
8942         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8943                                       getShiftAmountTy(LargeVT));
8944         SDValue SRA =
8945             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8946         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8947       }
8948     }
8949   }
8950
8951   // Simplify, based on bits shifted out of the LHS.
8952   if (SimplifyDemandedBits(SDValue(N, 0)))
8953     return SDValue(N, 0);
8954
8955   // If the sign bit is known to be zero, switch this to a SRL.
8956   if (DAG.SignBitIsZero(N0))
8957     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8958
8959   if (N1C && !N1C->isOpaque())
8960     if (SDValue NewSRA = visitShiftByConstant(N))
8961       return NewSRA;
8962
8963   // Try to transform this shift into a multiply-high if
8964   // it matches the appropriate pattern detected in combineShiftToMULH.
8965   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8966     return MULH;
8967
8968   return SDValue();
8969 }
8970
8971 SDValue DAGCombiner::visitSRL(SDNode *N) {
8972   SDValue N0 = N->getOperand(0);
8973   SDValue N1 = N->getOperand(1);
8974   if (SDValue V = DAG.simplifyShift(N0, N1))
8975     return V;
8976
8977   EVT VT = N0.getValueType();
8978   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8979
8980   // fold (srl c1, c2) -> c1 >>u c2
8981   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8982     return C;
8983
8984   // fold vector ops
8985   if (VT.isVector())
8986     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8987       return FoldedVOp;
8988
8989   if (SDValue NewSel = foldBinOpIntoSelect(N))
8990     return NewSel;
8991
8992   // if (srl x, c) is known to be zero, return 0
8993   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8994   if (N1C &&
8995       DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
8996     return DAG.getConstant(0, SDLoc(N), VT);
8997
8998   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8999   if (N0.getOpcode() == ISD::SRL) {
9000     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9001                                           ConstantSDNode *RHS) {
9002       APInt c1 = LHS->getAPIntValue();
9003       APInt c2 = RHS->getAPIntValue();
9004       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9005       return (c1 + c2).uge(OpSizeInBits);
9006     };
9007     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9008       return DAG.getConstant(0, SDLoc(N), VT);
9009
9010     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9011                                        ConstantSDNode *RHS) {
9012       APInt c1 = LHS->getAPIntValue();
9013       APInt c2 = RHS->getAPIntValue();
9014       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9015       return (c1 + c2).ult(OpSizeInBits);
9016     };
9017     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9018       SDLoc DL(N);
9019       EVT ShiftVT = N1.getValueType();
9020       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9021       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
9022     }
9023   }
9024
9025   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
9026       N0.getOperand(0).getOpcode() == ISD::SRL) {
9027     SDValue InnerShift = N0.getOperand(0);
9028     // TODO - support non-uniform vector shift amounts.
9029     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
9030       uint64_t c1 = N001C->getZExtValue();
9031       uint64_t c2 = N1C->getZExtValue();
9032       EVT InnerShiftVT = InnerShift.getValueType();
9033       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
9034       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
9035       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
9036       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
9037       if (c1 + OpSizeInBits == InnerShiftSize) {
9038         SDLoc DL(N);
9039         if (c1 + c2 >= InnerShiftSize)
9040           return DAG.getConstant(0, DL, VT);
9041         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9042         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9043                                        InnerShift.getOperand(0), NewShiftAmt);
9044         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
9045       }
9046       // In the more general case, we can clear the high bits after the shift:
9047       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
9048       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
9049           c1 + c2 < InnerShiftSize) {
9050         SDLoc DL(N);
9051         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9052         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9053                                        InnerShift.getOperand(0), NewShiftAmt);
9054         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
9055                                                             OpSizeInBits - c2),
9056                                        DL, InnerShiftVT);
9057         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
9058         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
9059       }
9060     }
9061   }
9062
9063   // fold (srl (shl x, c), c) -> (and x, cst2)
9064   // TODO - (srl (shl x, c1), c2).
9065   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
9066       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
9067     SDLoc DL(N);
9068     SDValue Mask =
9069         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
9070     AddToWorklist(Mask.getNode());
9071     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
9072   }
9073
9074   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
9075   // TODO - support non-uniform vector shift amounts.
9076   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
9077     // Shifting in all undef bits?
9078     EVT SmallVT = N0.getOperand(0).getValueType();
9079     unsigned BitSize = SmallVT.getScalarSizeInBits();
9080     if (N1C->getAPIntValue().uge(BitSize))
9081       return DAG.getUNDEF(VT);
9082
9083     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
9084       uint64_t ShiftAmt = N1C->getZExtValue();
9085       SDLoc DL0(N0);
9086       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
9087                                        N0.getOperand(0),
9088                           DAG.getConstant(ShiftAmt, DL0,
9089                                           getShiftAmountTy(SmallVT)));
9090       AddToWorklist(SmallShift.getNode());
9091       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
9092       SDLoc DL(N);
9093       return DAG.getNode(ISD::AND, DL, VT,
9094                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
9095                          DAG.getConstant(Mask, DL, VT));
9096     }
9097   }
9098
9099   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
9100   // bit, which is unmodified by sra.
9101   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
9102     if (N0.getOpcode() == ISD::SRA)
9103       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
9104   }
9105
9106   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
9107   if (N1C && N0.getOpcode() == ISD::CTLZ &&
9108       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
9109     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
9110
9111     // If any of the input bits are KnownOne, then the input couldn't be all
9112     // zeros, thus the result of the srl will always be zero.
9113     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
9114
9115     // If all of the bits input the to ctlz node are known to be zero, then
9116     // the result of the ctlz is "32" and the result of the shift is one.
9117     APInt UnknownBits = ~Known.Zero;
9118     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
9119
9120     // Otherwise, check to see if there is exactly one bit input to the ctlz.
9121     if (UnknownBits.isPowerOf2()) {
9122       // Okay, we know that only that the single bit specified by UnknownBits
9123       // could be set on input to the CTLZ node. If this bit is set, the SRL
9124       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
9125       // to an SRL/XOR pair, which is likely to simplify more.
9126       unsigned ShAmt = UnknownBits.countTrailingZeros();
9127       SDValue Op = N0.getOperand(0);
9128
9129       if (ShAmt) {
9130         SDLoc DL(N0);
9131         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
9132                   DAG.getConstant(ShAmt, DL,
9133                                   getShiftAmountTy(Op.getValueType())));
9134         AddToWorklist(Op.getNode());
9135       }
9136
9137       SDLoc DL(N);
9138       return DAG.getNode(ISD::XOR, DL, VT,
9139                          Op, DAG.getConstant(1, DL, VT));
9140     }
9141   }
9142
9143   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
9144   if (N1.getOpcode() == ISD::TRUNCATE &&
9145       N1.getOperand(0).getOpcode() == ISD::AND) {
9146     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9147       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
9148   }
9149
9150   // fold operands of srl based on knowledge that the low bits are not
9151   // demanded.
9152   if (SimplifyDemandedBits(SDValue(N, 0)))
9153     return SDValue(N, 0);
9154
9155   if (N1C && !N1C->isOpaque())
9156     if (SDValue NewSRL = visitShiftByConstant(N))
9157       return NewSRL;
9158
9159   // Attempt to convert a srl of a load into a narrower zero-extending load.
9160   if (SDValue NarrowLoad = reduceLoadWidth(N))
9161     return NarrowLoad;
9162
9163   // Here is a common situation. We want to optimize:
9164   //
9165   //   %a = ...
9166   //   %b = and i32 %a, 2
9167   //   %c = srl i32 %b, 1
9168   //   brcond i32 %c ...
9169   //
9170   // into
9171   //
9172   //   %a = ...
9173   //   %b = and %a, 2
9174   //   %c = setcc eq %b, 0
9175   //   brcond %c ...
9176   //
9177   // However when after the source operand of SRL is optimized into AND, the SRL
9178   // itself may not be optimized further. Look for it and add the BRCOND into
9179   // the worklist.
9180   if (N->hasOneUse()) {
9181     SDNode *Use = *N->use_begin();
9182     if (Use->getOpcode() == ISD::BRCOND)
9183       AddToWorklist(Use);
9184     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
9185       // Also look pass the truncate.
9186       Use = *Use->use_begin();
9187       if (Use->getOpcode() == ISD::BRCOND)
9188         AddToWorklist(Use);
9189     }
9190   }
9191
9192   // Try to transform this shift into a multiply-high if
9193   // it matches the appropriate pattern detected in combineShiftToMULH.
9194   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9195     return MULH;
9196
9197   return SDValue();
9198 }
9199
9200 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
9201   EVT VT = N->getValueType(0);
9202   SDValue N0 = N->getOperand(0);
9203   SDValue N1 = N->getOperand(1);
9204   SDValue N2 = N->getOperand(2);
9205   bool IsFSHL = N->getOpcode() == ISD::FSHL;
9206   unsigned BitWidth = VT.getScalarSizeInBits();
9207
9208   // fold (fshl N0, N1, 0) -> N0
9209   // fold (fshr N0, N1, 0) -> N1
9210   if (isPowerOf2_32(BitWidth))
9211     if (DAG.MaskedValueIsZero(
9212             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9213       return IsFSHL ? N0 : N1;
9214
9215   auto IsUndefOrZero = [](SDValue V) {
9216     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9217   };
9218
9219   // TODO - support non-uniform vector shift amounts.
9220   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9221     EVT ShAmtTy = N2.getValueType();
9222
9223     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9224     if (Cst->getAPIntValue().uge(BitWidth)) {
9225       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9226       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9227                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9228     }
9229
9230     unsigned ShAmt = Cst->getZExtValue();
9231     if (ShAmt == 0)
9232       return IsFSHL ? N0 : N1;
9233
9234     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9235     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9236     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9237     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9238     if (IsUndefOrZero(N0))
9239       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9240                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9241                                          SDLoc(N), ShAmtTy));
9242     if (IsUndefOrZero(N1))
9243       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9244                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9245                                          SDLoc(N), ShAmtTy));
9246
9247     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9248     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9249     // TODO - bigendian support once we have test coverage.
9250     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9251     // TODO - permit LHS EXTLOAD if extensions are shifted out.
9252     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9253         !DAG.getDataLayout().isBigEndian()) {
9254       auto *LHS = dyn_cast<LoadSDNode>(N0);
9255       auto *RHS = dyn_cast<LoadSDNode>(N1);
9256       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9257           LHS->getAddressSpace() == RHS->getAddressSpace() &&
9258           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9259           ISD::isNON_EXTLoad(LHS)) {
9260         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9261           SDLoc DL(RHS);
9262           uint64_t PtrOff =
9263               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9264           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9265           bool Fast = false;
9266           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9267                                      RHS->getAddressSpace(), NewAlign,
9268                                      RHS->getMemOperand()->getFlags(), &Fast) &&
9269               Fast) {
9270             SDValue NewPtr = DAG.getMemBasePlusOffset(
9271                 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9272             AddToWorklist(NewPtr.getNode());
9273             SDValue Load = DAG.getLoad(
9274                 VT, DL, RHS->getChain(), NewPtr,
9275                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9276                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9277             // Replace the old load's chain with the new load's chain.
9278             WorklistRemover DeadNodes(*this);
9279             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9280             return Load;
9281           }
9282         }
9283       }
9284     }
9285   }
9286
9287   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9288   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9289   // iff We know the shift amount is in range.
9290   // TODO: when is it worth doing SUB(BW, N2) as well?
9291   if (isPowerOf2_32(BitWidth)) {
9292     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9293     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9294       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9295     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9296       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9297   }
9298
9299   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9300   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9301   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9302   // is legal as well we might be better off avoiding non-constant (BW - N2).
9303   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9304   if (N0 == N1 && hasOperation(RotOpc, VT))
9305     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9306
9307   // Simplify, based on bits shifted out of N0/N1.
9308   if (SimplifyDemandedBits(SDValue(N, 0)))
9309     return SDValue(N, 0);
9310
9311   return SDValue();
9312 }
9313
9314 // Given a ABS node, detect the following pattern:
9315 // (ABS (SUB (EXTEND a), (EXTEND b))).
9316 // Generates UABD/SABD instruction.
9317 static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9318                                const TargetLowering &TLI) {
9319   SDValue AbsOp1 = N->getOperand(0);
9320   SDValue Op0, Op1;
9321
9322   if (AbsOp1.getOpcode() != ISD::SUB)
9323     return SDValue();
9324
9325   Op0 = AbsOp1.getOperand(0);
9326   Op1 = AbsOp1.getOperand(1);
9327
9328   unsigned Opc0 = Op0.getOpcode();
9329   // Check if the operands of the sub are (zero|sign)-extended.
9330   if (Opc0 != Op1.getOpcode() ||
9331       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9332     return SDValue();
9333
9334   EVT VT1 = Op0.getOperand(0).getValueType();
9335   EVT VT2 = Op1.getOperand(0).getValueType();
9336   // Check if the operands are of same type and valid size.
9337   unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9338   if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
9339     return SDValue();
9340
9341   Op0 = Op0.getOperand(0);
9342   Op1 = Op1.getOperand(0);
9343   SDValue ABD =
9344       DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
9345   return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
9346 }
9347
9348 SDValue DAGCombiner::visitABS(SDNode *N) {
9349   SDValue N0 = N->getOperand(0);
9350   EVT VT = N->getValueType(0);
9351
9352   // fold (abs c1) -> c2
9353   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9354     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9355   // fold (abs (abs x)) -> (abs x)
9356   if (N0.getOpcode() == ISD::ABS)
9357     return N0;
9358   // fold (abs x) -> x iff not-negative
9359   if (DAG.SignBitIsZero(N0))
9360     return N0;
9361
9362   if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9363     return ABD;
9364
9365   return SDValue();
9366 }
9367
9368 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9369   SDValue N0 = N->getOperand(0);
9370   EVT VT = N->getValueType(0);
9371
9372   // fold (bswap c1) -> c2
9373   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9374     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9375   // fold (bswap (bswap x)) -> x
9376   if (N0.getOpcode() == ISD::BSWAP)
9377     return N0->getOperand(0);
9378   return SDValue();
9379 }
9380
9381 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9382   SDValue N0 = N->getOperand(0);
9383   EVT VT = N->getValueType(0);
9384
9385   // fold (bitreverse c1) -> c2
9386   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9387     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9388   // fold (bitreverse (bitreverse x)) -> x
9389   if (N0.getOpcode() == ISD::BITREVERSE)
9390     return N0.getOperand(0);
9391   return SDValue();
9392 }
9393
9394 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9395   SDValue N0 = N->getOperand(0);
9396   EVT VT = N->getValueType(0);
9397
9398   // fold (ctlz c1) -> c2
9399   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9400     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9401
9402   // If the value is known never to be zero, switch to the undef version.
9403   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9404     if (DAG.isKnownNeverZero(N0))
9405       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9406   }
9407
9408   return SDValue();
9409 }
9410
9411 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9412   SDValue N0 = N->getOperand(0);
9413   EVT VT = N->getValueType(0);
9414
9415   // fold (ctlz_zero_undef c1) -> c2
9416   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9417     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9418   return SDValue();
9419 }
9420
9421 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9422   SDValue N0 = N->getOperand(0);
9423   EVT VT = N->getValueType(0);
9424
9425   // fold (cttz c1) -> c2
9426   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9427     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9428
9429   // If the value is known never to be zero, switch to the undef version.
9430   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9431     if (DAG.isKnownNeverZero(N0))
9432       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9433   }
9434
9435   return SDValue();
9436 }
9437
9438 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9439   SDValue N0 = N->getOperand(0);
9440   EVT VT = N->getValueType(0);
9441
9442   // fold (cttz_zero_undef c1) -> c2
9443   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9444     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9445   return SDValue();
9446 }
9447
9448 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9449   SDValue N0 = N->getOperand(0);
9450   EVT VT = N->getValueType(0);
9451
9452   // fold (ctpop c1) -> c2
9453   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9454     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9455   return SDValue();
9456 }
9457
9458 // FIXME: This should be checking for no signed zeros on individual operands, as
9459 // well as no nans.
9460 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9461                                          SDValue RHS,
9462                                          const TargetLowering &TLI) {
9463   const TargetOptions &Options = DAG.getTarget().Options;
9464   EVT VT = LHS.getValueType();
9465
9466   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9467          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9468          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9469 }
9470
9471 /// Generate Min/Max node
9472 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9473                                    SDValue RHS, SDValue True, SDValue False,
9474                                    ISD::CondCode CC, const TargetLowering &TLI,
9475                                    SelectionDAG &DAG) {
9476   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9477     return SDValue();
9478
9479   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9480   switch (CC) {
9481   case ISD::SETOLT:
9482   case ISD::SETOLE:
9483   case ISD::SETLT:
9484   case ISD::SETLE:
9485   case ISD::SETULT:
9486   case ISD::SETULE: {
9487     // Since it's known never nan to get here already, either fminnum or
9488     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9489     // expanded in terms of it.
9490     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9491     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9492       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9493
9494     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9495     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9496       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9497     return SDValue();
9498   }
9499   case ISD::SETOGT:
9500   case ISD::SETOGE:
9501   case ISD::SETGT:
9502   case ISD::SETGE:
9503   case ISD::SETUGT:
9504   case ISD::SETUGE: {
9505     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9506     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9507       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9508
9509     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9510     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9511       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9512     return SDValue();
9513   }
9514   default:
9515     return SDValue();
9516   }
9517 }
9518
9519 /// If a (v)select has a condition value that is a sign-bit test, try to smear
9520 /// the condition operand sign-bit across the value width and use it as a mask.
9521 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9522   SDValue Cond = N->getOperand(0);
9523   SDValue C1 = N->getOperand(1);
9524   SDValue C2 = N->getOperand(2);
9525   if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
9526     return SDValue();
9527
9528   EVT VT = N->getValueType(0);
9529   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9530       VT != Cond.getOperand(0).getValueType())
9531     return SDValue();
9532
9533   // The inverted-condition + commuted-select variants of these patterns are
9534   // canonicalized to these forms in IR.
9535   SDValue X = Cond.getOperand(0);
9536   SDValue CondC = Cond.getOperand(1);
9537   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9538   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9539       isAllOnesOrAllOnesSplat(C2)) {
9540     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9541     SDLoc DL(N);
9542     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9543     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9544     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9545   }
9546   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9547     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9548     SDLoc DL(N);
9549     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9550     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9551     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9552   }
9553   return SDValue();
9554 }
9555
9556 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9557   SDValue Cond = N->getOperand(0);
9558   SDValue N1 = N->getOperand(1);
9559   SDValue N2 = N->getOperand(2);
9560   EVT VT = N->getValueType(0);
9561   EVT CondVT = Cond.getValueType();
9562   SDLoc DL(N);
9563
9564   if (!VT.isInteger())
9565     return SDValue();
9566
9567   auto *C1 = dyn_cast<ConstantSDNode>(N1);
9568   auto *C2 = dyn_cast<ConstantSDNode>(N2);
9569   if (!C1 || !C2)
9570     return SDValue();
9571
9572   // Only do this before legalization to avoid conflicting with target-specific
9573   // transforms in the other direction (create a select from a zext/sext). There
9574   // is also a target-independent combine here in DAGCombiner in the other
9575   // direction for (select Cond, -1, 0) when the condition is not i1.
9576   if (CondVT == MVT::i1 && !LegalOperations) {
9577     if (C1->isZero() && C2->isOne()) {
9578       // select Cond, 0, 1 --> zext (!Cond)
9579       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9580       if (VT != MVT::i1)
9581         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9582       return NotCond;
9583     }
9584     if (C1->isZero() && C2->isAllOnes()) {
9585       // select Cond, 0, -1 --> sext (!Cond)
9586       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9587       if (VT != MVT::i1)
9588         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9589       return NotCond;
9590     }
9591     if (C1->isOne() && C2->isZero()) {
9592       // select Cond, 1, 0 --> zext (Cond)
9593       if (VT != MVT::i1)
9594         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9595       return Cond;
9596     }
9597     if (C1->isAllOnes() && C2->isZero()) {
9598       // select Cond, -1, 0 --> sext (Cond)
9599       if (VT != MVT::i1)
9600         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9601       return Cond;
9602     }
9603
9604     // Use a target hook because some targets may prefer to transform in the
9605     // other direction.
9606     if (TLI.convertSelectOfConstantsToMath(VT)) {
9607       // For any constants that differ by 1, we can transform the select into an
9608       // extend and add.
9609       const APInt &C1Val = C1->getAPIntValue();
9610       const APInt &C2Val = C2->getAPIntValue();
9611       if (C1Val - 1 == C2Val) {
9612         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9613         if (VT != MVT::i1)
9614           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9615         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9616       }
9617       if (C1Val + 1 == C2Val) {
9618         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9619         if (VT != MVT::i1)
9620           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9621         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9622       }
9623
9624       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9625       if (C1Val.isPowerOf2() && C2Val.isZero()) {
9626         if (VT != MVT::i1)
9627           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9628         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9629         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9630       }
9631
9632       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9633         return V;
9634     }
9635
9636     return SDValue();
9637   }
9638
9639   // fold (select Cond, 0, 1) -> (xor Cond, 1)
9640   // We can't do this reliably if integer based booleans have different contents
9641   // to floating point based booleans. This is because we can't tell whether we
9642   // have an integer-based boolean or a floating-point-based boolean unless we
9643   // can find the SETCC that produced it and inspect its operands. This is
9644   // fairly easy if C is the SETCC node, but it can potentially be
9645   // undiscoverable (or not reasonably discoverable). For example, it could be
9646   // in another basic block or it could require searching a complicated
9647   // expression.
9648   if (CondVT.isInteger() &&
9649       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9650           TargetLowering::ZeroOrOneBooleanContent &&
9651       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9652           TargetLowering::ZeroOrOneBooleanContent &&
9653       C1->isZero() && C2->isOne()) {
9654     SDValue NotCond =
9655         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9656     if (VT.bitsEq(CondVT))
9657       return NotCond;
9658     return DAG.getZExtOrTrunc(NotCond, DL, VT);
9659   }
9660
9661   return SDValue();
9662 }
9663
9664 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
9665   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
9666          "Expected a (v)select");
9667   SDValue Cond = N->getOperand(0);
9668   SDValue T = N->getOperand(1), F = N->getOperand(2);
9669   EVT VT = N->getValueType(0);
9670   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
9671     return SDValue();
9672
9673   // select Cond, Cond, F --> or Cond, F
9674   // select Cond, 1, F    --> or Cond, F
9675   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
9676     return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
9677
9678   // select Cond, T, Cond --> and Cond, T
9679   // select Cond, T, 0    --> and Cond, T
9680   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
9681     return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
9682
9683   // select Cond, T, 1 --> or (not Cond), T
9684   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
9685     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9686     return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
9687   }
9688
9689   // select Cond, 0, F --> and (not Cond), F
9690   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
9691     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9692     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
9693   }
9694
9695   return SDValue();
9696 }
9697
9698 static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
9699   SDValue N0 = N->getOperand(0);
9700   SDValue N1 = N->getOperand(1);
9701   SDValue N2 = N->getOperand(2);
9702   EVT VT = N->getValueType(0);
9703   if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
9704     return SDValue();
9705
9706   SDValue Cond0 = N0.getOperand(0);
9707   SDValue Cond1 = N0.getOperand(1);
9708   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9709   if (VT != Cond0.getValueType())
9710     return SDValue();
9711
9712   // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
9713   // compare is inverted from that pattern ("Cond0 s> -1").
9714   if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
9715     ; // This is the pattern we are looking for.
9716   else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
9717     std::swap(N1, N2);
9718   else
9719     return SDValue();
9720
9721   // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
9722   if (isNullOrNullSplat(N2)) {
9723     SDLoc DL(N);
9724     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9725     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9726     return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
9727   }
9728
9729   // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
9730   if (isAllOnesOrAllOnesSplat(N1)) {
9731     SDLoc DL(N);
9732     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9733     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9734     return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
9735   }
9736
9737   // If we have to invert the sign bit mask, only do that transform if the
9738   // target has a bitwise 'and not' instruction (the invert is free).
9739   // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
9740   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9741   if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
9742     SDLoc DL(N);
9743     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
9744     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
9745     SDValue Not = DAG.getNOT(DL, Sra, VT);
9746     return DAG.getNode(ISD::AND, DL, VT, Not, N2);
9747   }
9748
9749   // TODO: There's another pattern in this family, but it may require
9750   //       implementing hasOrNot() to check for profitability:
9751   //       (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
9752
9753   return SDValue();
9754 }
9755
9756 SDValue DAGCombiner::visitSELECT(SDNode *N) {
9757   SDValue N0 = N->getOperand(0);
9758   SDValue N1 = N->getOperand(1);
9759   SDValue N2 = N->getOperand(2);
9760   EVT VT = N->getValueType(0);
9761   EVT VT0 = N0.getValueType();
9762   SDLoc DL(N);
9763   SDNodeFlags Flags = N->getFlags();
9764
9765   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9766     return V;
9767
9768   if (SDValue V = foldSelectOfConstants(N))
9769     return V;
9770
9771   if (SDValue V = foldBoolSelectToLogic(N, DAG))
9772     return V;
9773
9774   // If we can fold this based on the true/false value, do so.
9775   if (SimplifySelectOps(N, N1, N2))
9776     return SDValue(N, 0); // Don't revisit N.
9777
9778   if (VT0 == MVT::i1) {
9779     // The code in this block deals with the following 2 equivalences:
9780     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9781     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9782     // The target can specify its preferred form with the
9783     // shouldNormalizeToSelectSequence() callback. However we always transform
9784     // to the right anyway if we find the inner select exists in the DAG anyway
9785     // and we always transform to the left side if we know that we can further
9786     // optimize the combination of the conditions.
9787     bool normalizeToSequence =
9788         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9789     // select (and Cond0, Cond1), X, Y
9790     //   -> select Cond0, (select Cond1, X, Y), Y
9791     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9792       SDValue Cond0 = N0->getOperand(0);
9793       SDValue Cond1 = N0->getOperand(1);
9794       SDValue InnerSelect =
9795           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9796       if (normalizeToSequence || !InnerSelect.use_empty())
9797         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9798                            InnerSelect, N2, Flags);
9799       // Cleanup on failure.
9800       if (InnerSelect.use_empty())
9801         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9802     }
9803     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9804     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9805       SDValue Cond0 = N0->getOperand(0);
9806       SDValue Cond1 = N0->getOperand(1);
9807       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9808                                         Cond1, N1, N2, Flags);
9809       if (normalizeToSequence || !InnerSelect.use_empty())
9810         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9811                            InnerSelect, Flags);
9812       // Cleanup on failure.
9813       if (InnerSelect.use_empty())
9814         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9815     }
9816
9817     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9818     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9819       SDValue N1_0 = N1->getOperand(0);
9820       SDValue N1_1 = N1->getOperand(1);
9821       SDValue N1_2 = N1->getOperand(2);
9822       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9823         // Create the actual and node if we can generate good code for it.
9824         if (!normalizeToSequence) {
9825           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9826           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9827                              N2, Flags);
9828         }
9829         // Otherwise see if we can optimize the "and" to a better pattern.
9830         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9831           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9832                              N2, Flags);
9833         }
9834       }
9835     }
9836     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9837     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9838       SDValue N2_0 = N2->getOperand(0);
9839       SDValue N2_1 = N2->getOperand(1);
9840       SDValue N2_2 = N2->getOperand(2);
9841       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9842         // Create the actual or node if we can generate good code for it.
9843         if (!normalizeToSequence) {
9844           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9845           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9846                              N2_2, Flags);
9847         }
9848         // Otherwise see if we can optimize to a better pattern.
9849         if (SDValue Combined = visitORLike(N0, N2_0, N))
9850           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9851                              N2_2, Flags);
9852       }
9853     }
9854   }
9855
9856   // select (not Cond), N1, N2 -> select Cond, N2, N1
9857   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9858     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9859     SelectOp->setFlags(Flags);
9860     return SelectOp;
9861   }
9862
9863   // Fold selects based on a setcc into other things, such as min/max/abs.
9864   if (N0.getOpcode() == ISD::SETCC) {
9865     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9866     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9867
9868     // select (fcmp lt x, y), x, y -> fminnum x, y
9869     // select (fcmp gt x, y), x, y -> fmaxnum x, y
9870     //
9871     // This is OK if we don't care what happens if either operand is a NaN.
9872     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9873       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9874                                                 CC, TLI, DAG))
9875         return FMinMax;
9876
9877     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9878     // This is conservatively limited to pre-legal-operations to give targets
9879     // a chance to reverse the transform if they want to do that. Also, it is
9880     // unlikely that the pattern would be formed late, so it's probably not
9881     // worth going through the other checks.
9882     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9883         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9884         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9885       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9886       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9887       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9888         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9889         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9890         //
9891         // The IR equivalent of this transform would have this form:
9892         //   %a = add %x, C
9893         //   %c = icmp ugt %x, ~C
9894         //   %r = select %c, -1, %a
9895         //   =>
9896         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9897         //   %u0 = extractvalue %u, 0
9898         //   %u1 = extractvalue %u, 1
9899         //   %r = select %u1, -1, %u0
9900         SDVTList VTs = DAG.getVTList(VT, VT0);
9901         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9902         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9903       }
9904     }
9905
9906     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9907         (!LegalOperations &&
9908          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9909       // Any flags available in a select/setcc fold will be on the setcc as they
9910       // migrated from fcmp
9911       Flags = N0.getNode()->getFlags();
9912       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9913                                        N2, N0.getOperand(2));
9914       SelectNode->setFlags(Flags);
9915       return SelectNode;
9916     }
9917
9918     if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
9919       return NewSel;
9920   }
9921
9922   if (!VT.isVector())
9923     if (SDValue BinOp = foldSelectOfBinops(N))
9924       return BinOp;
9925
9926   return SDValue();
9927 }
9928
9929 // This function assumes all the vselect's arguments are CONCAT_VECTOR
9930 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9931 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9932   SDLoc DL(N);
9933   SDValue Cond = N->getOperand(0);
9934   SDValue LHS = N->getOperand(1);
9935   SDValue RHS = N->getOperand(2);
9936   EVT VT = N->getValueType(0);
9937   int NumElems = VT.getVectorNumElements();
9938   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
9939          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
9940          Cond.getOpcode() == ISD::BUILD_VECTOR);
9941
9942   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9943   // binary ones here.
9944   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9945     return SDValue();
9946
9947   // We're sure we have an even number of elements due to the
9948   // concat_vectors we have as arguments to vselect.
9949   // Skip BV elements until we find one that's not an UNDEF
9950   // After we find an UNDEF element, keep looping until we get to half the
9951   // length of the BV and see if all the non-undef nodes are the same.
9952   ConstantSDNode *BottomHalf = nullptr;
9953   for (int i = 0; i < NumElems / 2; ++i) {
9954     if (Cond->getOperand(i)->isUndef())
9955       continue;
9956
9957     if (BottomHalf == nullptr)
9958       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9959     else if (Cond->getOperand(i).getNode() != BottomHalf)
9960       return SDValue();
9961   }
9962
9963   // Do the same for the second half of the BuildVector
9964   ConstantSDNode *TopHalf = nullptr;
9965   for (int i = NumElems / 2; i < NumElems; ++i) {
9966     if (Cond->getOperand(i)->isUndef())
9967       continue;
9968
9969     if (TopHalf == nullptr)
9970       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9971     else if (Cond->getOperand(i).getNode() != TopHalf)
9972       return SDValue();
9973   }
9974
9975   assert(TopHalf && BottomHalf &&
9976          "One half of the selector was all UNDEFs and the other was all the "
9977          "same value. This should have been addressed before this function.");
9978   return DAG.getNode(
9979       ISD::CONCAT_VECTORS, DL, VT,
9980       BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
9981       TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
9982 }
9983
9984 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
9985   if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
9986     return false;
9987
9988   // For now we check only the LHS of the add.
9989   SDValue LHS = Index.getOperand(0);
9990   SDValue SplatVal = DAG.getSplatValue(LHS);
9991   if (!SplatVal)
9992     return false;
9993
9994   BasePtr = SplatVal;
9995   Index = Index.getOperand(1);
9996   return true;
9997 }
9998
9999 // Fold sext/zext of index into index type.
10000 bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
10001                      bool Scaled, SelectionDAG &DAG) {
10002   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10003
10004   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
10005     SDValue Op = Index.getOperand(0);
10006     MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
10007     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10008       Index = Op;
10009       return true;
10010     }
10011   }
10012
10013   if (Index.getOpcode() == ISD::SIGN_EXTEND) {
10014     SDValue Op = Index.getOperand(0);
10015     MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
10016     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10017       Index = Op;
10018       return true;
10019     }
10020   }
10021
10022   return false;
10023 }
10024
10025 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
10026   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
10027   SDValue Mask = MSC->getMask();
10028   SDValue Chain = MSC->getChain();
10029   SDValue Index = MSC->getIndex();
10030   SDValue Scale = MSC->getScale();
10031   SDValue StoreVal = MSC->getValue();
10032   SDValue BasePtr = MSC->getBasePtr();
10033   SDLoc DL(N);
10034
10035   // Zap scatters with a zero mask.
10036   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10037     return Chain;
10038
10039   if (refineUniformBase(BasePtr, Index, DAG)) {
10040     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10041     return DAG.getMaskedScatter(
10042         DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10043         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10044   }
10045
10046   if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
10047     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10048     return DAG.getMaskedScatter(
10049         DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10050         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10051   }
10052
10053   return SDValue();
10054 }
10055
10056 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
10057   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
10058   SDValue Mask = MST->getMask();
10059   SDValue Chain = MST->getChain();
10060   SDValue Value = MST->getValue();
10061   SDValue Ptr = MST->getBasePtr();
10062   SDLoc DL(N);
10063
10064   // Zap masked stores with a zero mask.
10065   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10066     return Chain;
10067
10068   // If this is a masked load with an all ones mask, we can use a unmasked load.
10069   // FIXME: Can we do this for indexed, compressing, or truncating stores?
10070   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
10071       !MST->isCompressingStore() && !MST->isTruncatingStore())
10072     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
10073                         MST->getBasePtr(), MST->getPointerInfo(),
10074                         MST->getOriginalAlign(), MachineMemOperand::MOStore,
10075                         MST->getAAInfo());
10076
10077   // Try transforming N to an indexed store.
10078   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10079     return SDValue(N, 0);
10080
10081   if (MST->isTruncatingStore() && MST->isUnindexed() &&
10082       Value.getValueType().isInteger() &&
10083       (!isa<ConstantSDNode>(Value) ||
10084        !cast<ConstantSDNode>(Value)->isOpaque())) {
10085     APInt TruncDemandedBits =
10086         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
10087                              MST->getMemoryVT().getScalarSizeInBits());
10088
10089     // See if we can simplify the operation with
10090     // SimplifyDemandedBits, which only works if the value has a single use.
10091     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
10092       // Re-visit the store if anything changed and the store hasn't been merged
10093       // with another node (N is deleted) SimplifyDemandedBits will add Value's
10094       // node back to the worklist if necessary, but we also need to re-visit
10095       // the Store node itself.
10096       if (N->getOpcode() != ISD::DELETED_NODE)
10097         AddToWorklist(N);
10098       return SDValue(N, 0);
10099     }
10100   }
10101
10102   // If this is a TRUNC followed by a masked store, fold this into a masked
10103   // truncating store.  We can do this even if this is already a masked
10104   // truncstore.
10105   if ((Value.getOpcode() == ISD::TRUNCATE) && Value.getNode()->hasOneUse() &&
10106       MST->isUnindexed() &&
10107       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
10108                                MST->getMemoryVT(), LegalOperations)) {
10109     auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
10110                                          Value.getOperand(0).getValueType());
10111     return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
10112                               MST->getOffset(), Mask, MST->getMemoryVT(),
10113                               MST->getMemOperand(), MST->getAddressingMode(),
10114                               /*IsTruncating=*/true);
10115   }
10116
10117   return SDValue();
10118 }
10119
10120 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
10121   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
10122   SDValue Mask = MGT->getMask();
10123   SDValue Chain = MGT->getChain();
10124   SDValue Index = MGT->getIndex();
10125   SDValue Scale = MGT->getScale();
10126   SDValue PassThru = MGT->getPassThru();
10127   SDValue BasePtr = MGT->getBasePtr();
10128   SDLoc DL(N);
10129
10130   // Zap gathers with a zero mask.
10131   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10132     return CombineTo(N, PassThru, MGT->getChain());
10133
10134   if (refineUniformBase(BasePtr, Index, DAG)) {
10135     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10136     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10137                                MGT->getMemoryVT(), DL, Ops,
10138                                MGT->getMemOperand(), MGT->getIndexType(),
10139                                MGT->getExtensionType());
10140   }
10141
10142   if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
10143     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10144     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10145                                MGT->getMemoryVT(), DL, Ops,
10146                                MGT->getMemOperand(), MGT->getIndexType(),
10147                                MGT->getExtensionType());
10148   }
10149
10150   return SDValue();
10151 }
10152
10153 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
10154   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
10155   SDValue Mask = MLD->getMask();
10156   SDLoc DL(N);
10157
10158   // Zap masked loads with a zero mask.
10159   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10160     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
10161
10162   // If this is a masked load with an all ones mask, we can use a unmasked load.
10163   // FIXME: Can we do this for indexed, expanding, or extending loads?
10164   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
10165       !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
10166     SDValue NewLd = DAG.getLoad(
10167         N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
10168         MLD->getPointerInfo(), MLD->getOriginalAlign(),
10169         MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
10170     return CombineTo(N, NewLd, NewLd.getValue(1));
10171   }
10172
10173   // Try transforming N to an indexed load.
10174   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10175     return SDValue(N, 0);
10176
10177   return SDValue();
10178 }
10179
10180 /// A vector select of 2 constant vectors can be simplified to math/logic to
10181 /// avoid a variable select instruction and possibly avoid constant loads.
10182 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
10183   SDValue Cond = N->getOperand(0);
10184   SDValue N1 = N->getOperand(1);
10185   SDValue N2 = N->getOperand(2);
10186   EVT VT = N->getValueType(0);
10187   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
10188       !TLI.convertSelectOfConstantsToMath(VT) ||
10189       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
10190       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
10191     return SDValue();
10192
10193   // Check if we can use the condition value to increment/decrement a single
10194   // constant value. This simplifies a select to an add and removes a constant
10195   // load/materialization from the general case.
10196   bool AllAddOne = true;
10197   bool AllSubOne = true;
10198   unsigned Elts = VT.getVectorNumElements();
10199   for (unsigned i = 0; i != Elts; ++i) {
10200     SDValue N1Elt = N1.getOperand(i);
10201     SDValue N2Elt = N2.getOperand(i);
10202     if (N1Elt.isUndef() || N2Elt.isUndef())
10203       continue;
10204     if (N1Elt.getValueType() != N2Elt.getValueType())
10205       continue;
10206
10207     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
10208     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
10209     if (C1 != C2 + 1)
10210       AllAddOne = false;
10211     if (C1 != C2 - 1)
10212       AllSubOne = false;
10213   }
10214
10215   // Further simplifications for the extra-special cases where the constants are
10216   // all 0 or all -1 should be implemented as folds of these patterns.
10217   SDLoc DL(N);
10218   if (AllAddOne || AllSubOne) {
10219     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
10220     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
10221     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
10222     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
10223     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
10224   }
10225
10226   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
10227   APInt Pow2C;
10228   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
10229       isNullOrNullSplat(N2)) {
10230     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
10231     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
10232     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
10233   }
10234
10235   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10236     return V;
10237
10238   // The general case for select-of-constants:
10239   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
10240   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
10241   // leave that to a machine-specific pass.
10242   return SDValue();
10243 }
10244
10245 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
10246   SDValue N0 = N->getOperand(0);
10247   SDValue N1 = N->getOperand(1);
10248   SDValue N2 = N->getOperand(2);
10249   EVT VT = N->getValueType(0);
10250   SDLoc DL(N);
10251
10252   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10253     return V;
10254
10255   if (SDValue V = foldBoolSelectToLogic(N, DAG))
10256     return V;
10257
10258   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
10259   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
10260     return DAG.getSelect(DL, VT, F, N2, N1);
10261
10262   // Canonicalize integer abs.
10263   // vselect (setg[te] X,  0),  X, -X ->
10264   // vselect (setgt    X, -1),  X, -X ->
10265   // vselect (setl[te] X,  0), -X,  X ->
10266   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
10267   if (N0.getOpcode() == ISD::SETCC) {
10268     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
10269     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10270     bool isAbs = false;
10271     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
10272
10273     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
10274          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
10275         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
10276       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
10277     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
10278              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
10279       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
10280
10281     if (isAbs) {
10282       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
10283         return DAG.getNode(ISD::ABS, DL, VT, LHS);
10284
10285       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
10286                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
10287                                                   DL, getShiftAmountTy(VT)));
10288       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
10289       AddToWorklist(Shift.getNode());
10290       AddToWorklist(Add.getNode());
10291       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
10292     }
10293
10294     // vselect x, y (fcmp lt x, y) -> fminnum x, y
10295     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
10296     //
10297     // This is OK if we don't care about what happens if either operand is a
10298     // NaN.
10299     //
10300     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10301       if (SDValue FMinMax =
10302               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10303         return FMinMax;
10304     }
10305
10306     if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10307       return S;
10308
10309     // If this select has a condition (setcc) with narrower operands than the
10310     // select, try to widen the compare to match the select width.
10311     // TODO: This should be extended to handle any constant.
10312     // TODO: This could be extended to handle non-loading patterns, but that
10313     //       requires thorough testing to avoid regressions.
10314     if (isNullOrNullSplat(RHS)) {
10315       EVT NarrowVT = LHS.getValueType();
10316       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
10317       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10318       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10319       unsigned WideWidth = WideVT.getScalarSizeInBits();
10320       bool IsSigned = isSignedIntSetCC(CC);
10321       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10322       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10323           SetCCWidth != 1 && SetCCWidth < WideWidth &&
10324           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10325           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10326         // Both compare operands can be widened for free. The LHS can use an
10327         // extended load, and the RHS is a constant:
10328         //   vselect (ext (setcc load(X), C)), N1, N2 -->
10329         //   vselect (setcc extload(X), C'), N1, N2
10330         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10331         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10332         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10333         EVT WideSetCCVT = getSetCCResultType(WideVT);
10334         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10335         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10336       }
10337     }
10338
10339     // Match VSELECTs into add with unsigned saturation.
10340     if (hasOperation(ISD::UADDSAT, VT)) {
10341       // Check if one of the arms of the VSELECT is vector with all bits set.
10342       // If it's on the left side invert the predicate to simplify logic below.
10343       SDValue Other;
10344       ISD::CondCode SatCC = CC;
10345       if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10346         Other = N2;
10347         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10348       } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10349         Other = N1;
10350       }
10351
10352       if (Other && Other.getOpcode() == ISD::ADD) {
10353         SDValue CondLHS = LHS, CondRHS = RHS;
10354         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10355
10356         // Canonicalize condition operands.
10357         if (SatCC == ISD::SETUGE) {
10358           std::swap(CondLHS, CondRHS);
10359           SatCC = ISD::SETULE;
10360         }
10361
10362         // We can test against either of the addition operands.
10363         // x <= x+y ? x+y : ~0 --> uaddsat x, y
10364         // x+y >= x ? x+y : ~0 --> uaddsat x, y
10365         if (SatCC == ISD::SETULE && Other == CondRHS &&
10366             (OpLHS == CondLHS || OpRHS == CondLHS))
10367           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10368
10369         if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10370             (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10371              OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10372             CondLHS == OpLHS) {
10373           // If the RHS is a constant we have to reverse the const
10374           // canonicalization.
10375           // x >= ~C ? x+C : ~0 --> uaddsat x, C
10376           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10377             return Cond->getAPIntValue() == ~Op->getAPIntValue();
10378           };
10379           if (SatCC == ISD::SETULE &&
10380               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10381             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10382         }
10383       }
10384     }
10385
10386     // Match VSELECTs into sub with unsigned saturation.
10387     if (hasOperation(ISD::USUBSAT, VT)) {
10388       // Check if one of the arms of the VSELECT is a zero vector. If it's on
10389       // the left side invert the predicate to simplify logic below.
10390       SDValue Other;
10391       ISD::CondCode SatCC = CC;
10392       if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10393         Other = N2;
10394         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10395       } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10396         Other = N1;
10397       }
10398
10399       if (Other && Other.getNumOperands() == 2) {
10400         SDValue CondRHS = RHS;
10401         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10402
10403         if (Other.getOpcode() == ISD::SUB &&
10404             LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10405             OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10406           // Look for a general sub with unsigned saturation first.
10407           // zext(x) >= y ? x - trunc(y) : 0
10408           // --> usubsat(x,trunc(umin(y,SatLimit)))
10409           // zext(x) >  y ? x - trunc(y) : 0
10410           // --> usubsat(x,trunc(umin(y,SatLimit)))
10411           if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10412             return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10413                                        DL);
10414         }
10415
10416         if (OpLHS == LHS) {
10417           // Look for a general sub with unsigned saturation first.
10418           // x >= y ? x-y : 0 --> usubsat x, y
10419           // x >  y ? x-y : 0 --> usubsat x, y
10420           if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10421               Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10422             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10423
10424           if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10425               OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10426             if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10427                 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10428               // If the RHS is a constant we have to reverse the const
10429               // canonicalization.
10430               // x > C-1 ? x+-C : 0 --> usubsat x, C
10431               auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10432                 return (!Op && !Cond) ||
10433                        (Op && Cond &&
10434                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10435               };
10436               if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10437                   ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10438                                             /*AllowUndefs*/ true)) {
10439                 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10440                                     DAG.getConstant(0, DL, VT), OpRHS);
10441                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10442               }
10443
10444               // Another special case: If C was a sign bit, the sub has been
10445               // canonicalized into a xor.
10446               // FIXME: Would it be better to use computeKnownBits to determine
10447               //        whether it's safe to decanonicalize the xor?
10448               // x s< 0 ? x^C : 0 --> usubsat x, C
10449               APInt SplatValue;
10450               if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10451                   ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
10452                   ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
10453                   SplatValue.isSignMask()) {
10454                 // Note that we have to rebuild the RHS constant here to
10455                 // ensure we don't rely on particular values of undef lanes.
10456                 OpRHS = DAG.getConstant(SplatValue, DL, VT);
10457                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10458               }
10459             }
10460           }
10461         }
10462       }
10463     }
10464   }
10465
10466   if (SimplifySelectOps(N, N1, N2))
10467     return SDValue(N, 0);  // Don't revisit N.
10468
10469   // Fold (vselect all_ones, N1, N2) -> N1
10470   if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
10471     return N1;
10472   // Fold (vselect all_zeros, N1, N2) -> N2
10473   if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
10474     return N2;
10475
10476   // The ConvertSelectToConcatVector function is assuming both the above
10477   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10478   // and addressed.
10479   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10480       N2.getOpcode() == ISD::CONCAT_VECTORS &&
10481       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
10482     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
10483       return CV;
10484   }
10485
10486   if (SDValue V = foldVSelectOfConstants(N))
10487     return V;
10488
10489   if (hasOperation(ISD::SRA, VT))
10490     if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
10491       return V;
10492
10493   return SDValue();
10494 }
10495
10496 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10497   SDValue N0 = N->getOperand(0);
10498   SDValue N1 = N->getOperand(1);
10499   SDValue N2 = N->getOperand(2);
10500   SDValue N3 = N->getOperand(3);
10501   SDValue N4 = N->getOperand(4);
10502   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10503
10504   // fold select_cc lhs, rhs, x, x, cc -> x
10505   if (N2 == N3)
10506     return N2;
10507
10508   // Determine if the condition we're dealing with is constant
10509   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10510                                   CC, SDLoc(N), false)) {
10511     AddToWorklist(SCC.getNode());
10512
10513     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10514       if (!SCCC->isZero())
10515         return N2;    // cond always true -> true val
10516       else
10517         return N3;    // cond always false -> false val
10518     } else if (SCC->isUndef()) {
10519       // When the condition is UNDEF, just return the first operand. This is
10520       // coherent the DAG creation, no setcc node is created in this case
10521       return N2;
10522     } else if (SCC.getOpcode() == ISD::SETCC) {
10523       // Fold to a simpler select_cc
10524       SDValue SelectOp = DAG.getNode(
10525           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10526           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10527       SelectOp->setFlags(SCC->getFlags());
10528       return SelectOp;
10529     }
10530   }
10531
10532   // If we can fold this based on the true/false value, do so.
10533   if (SimplifySelectOps(N, N2, N3))
10534     return SDValue(N, 0);  // Don't revisit N.
10535
10536   // fold select_cc into other things, such as min/max/abs
10537   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10538 }
10539
10540 SDValue DAGCombiner::visitSETCC(SDNode *N) {
10541   // setcc is very commonly used as an argument to brcond. This pattern
10542   // also lend itself to numerous combines and, as a result, it is desired
10543   // we keep the argument to a brcond as a setcc as much as possible.
10544   bool PreferSetCC =
10545       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10546
10547   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10548   EVT VT = N->getValueType(0);
10549
10550   //   SETCC(FREEZE(X), CONST, Cond)
10551   // =>
10552   //   FREEZE(SETCC(X, CONST, Cond))
10553   // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
10554   // isn't equivalent to true or false.
10555   // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
10556   // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
10557   //
10558   // This transformation is beneficial because visitBRCOND can fold
10559   // BRCOND(FREEZE(X)) to BRCOND(X).
10560
10561   // Conservatively optimize integer comparisons only.
10562   if (PreferSetCC) {
10563     // Do this only when SETCC is going to be used by BRCOND.
10564
10565     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
10566     ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
10567     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
10568     bool Updated = false;
10569
10570     // Is 'X Cond C' always true or false?
10571     auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
10572       bool False = (Cond == ISD::SETULT && C->isZero()) ||
10573                    (Cond == ISD::SETLT  && C->isMinSignedValue()) ||
10574                    (Cond == ISD::SETUGT && C->isAllOnes()) ||
10575                    (Cond == ISD::SETGT  && C->isMaxSignedValue());
10576       bool True =  (Cond == ISD::SETULE && C->isAllOnes()) ||
10577                    (Cond == ISD::SETLE  && C->isMaxSignedValue()) ||
10578                    (Cond == ISD::SETUGE && C->isZero()) ||
10579                    (Cond == ISD::SETGE  && C->isMinSignedValue());
10580       return True || False;
10581     };
10582
10583     if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
10584       if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
10585         N0 = N0->getOperand(0);
10586         Updated = true;
10587       }
10588     }
10589     if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
10590       if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
10591                                N0C)) {
10592         N1 = N1->getOperand(0);
10593         Updated = true;
10594       }
10595     }
10596
10597     if (Updated)
10598       return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
10599   }
10600
10601   SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
10602                                    SDLoc(N), !PreferSetCC);
10603
10604   if (!Combined)
10605     return SDValue();
10606
10607   // If we prefer to have a setcc, and we don't, we'll try our best to
10608   // recreate one using rebuildSetCC.
10609   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10610     SDValue NewSetCC = rebuildSetCC(Combined);
10611
10612     // We don't have anything interesting to combine to.
10613     if (NewSetCC.getNode() == N)
10614       return SDValue();
10615
10616     if (NewSetCC)
10617       return NewSetCC;
10618   }
10619
10620   return Combined;
10621 }
10622
10623 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10624   SDValue LHS = N->getOperand(0);
10625   SDValue RHS = N->getOperand(1);
10626   SDValue Carry = N->getOperand(2);
10627   SDValue Cond = N->getOperand(3);
10628
10629   // If Carry is false, fold to a regular SETCC.
10630   if (isNullConstant(Carry))
10631     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10632
10633   return SDValue();
10634 }
10635
10636 /// Check if N satisfies:
10637 ///   N is used once.
10638 ///   N is a Load.
10639 ///   The load is compatible with ExtOpcode. It means
10640 ///     If load has explicit zero/sign extension, ExpOpcode must have the same
10641 ///     extension.
10642 ///     Otherwise returns true.
10643 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10644   if (!N.hasOneUse())
10645     return false;
10646
10647   if (!isa<LoadSDNode>(N))
10648     return false;
10649
10650   LoadSDNode *Load = cast<LoadSDNode>(N);
10651   ISD::LoadExtType LoadExt = Load->getExtensionType();
10652   if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10653     return true;
10654
10655   // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10656   // extension.
10657   if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10658       (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10659     return false;
10660
10661   return true;
10662 }
10663
10664 /// Fold
10665 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10666 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10667 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10668 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10669 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10670 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10671                                          SelectionDAG &DAG) {
10672   unsigned Opcode = N->getOpcode();
10673   SDValue N0 = N->getOperand(0);
10674   EVT VT = N->getValueType(0);
10675   SDLoc DL(N);
10676
10677   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10678           Opcode == ISD::ANY_EXTEND) &&
10679          "Expected EXTEND dag node in input!");
10680
10681   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10682       !N0.hasOneUse())
10683     return SDValue();
10684
10685   SDValue Op1 = N0->getOperand(1);
10686   SDValue Op2 = N0->getOperand(2);
10687   if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10688     return SDValue();
10689
10690   auto ExtLoadOpcode = ISD::EXTLOAD;
10691   if (Opcode == ISD::SIGN_EXTEND)
10692     ExtLoadOpcode = ISD::SEXTLOAD;
10693   else if (Opcode == ISD::ZERO_EXTEND)
10694     ExtLoadOpcode = ISD::ZEXTLOAD;
10695
10696   LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10697   LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10698   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10699       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10700     return SDValue();
10701
10702   SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10703   SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10704   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10705 }
10706
10707 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10708 /// a build_vector of constants.
10709 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10710 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10711 /// Vector extends are not folded if operations are legal; this is to
10712 /// avoid introducing illegal build_vector dag nodes.
10713 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
10714                                          SelectionDAG &DAG, bool LegalTypes) {
10715   unsigned Opcode = N->getOpcode();
10716   SDValue N0 = N->getOperand(0);
10717   EVT VT = N->getValueType(0);
10718   SDLoc DL(N);
10719
10720   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10721          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
10722          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
10723          && "Expected EXTEND dag node in input!");
10724
10725   // fold (sext c1) -> c1
10726   // fold (zext c1) -> c1
10727   // fold (aext c1) -> c1
10728   if (isa<ConstantSDNode>(N0))
10729     return DAG.getNode(Opcode, DL, VT, N0);
10730
10731   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10732   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10733   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10734   if (N0->getOpcode() == ISD::SELECT) {
10735     SDValue Op1 = N0->getOperand(1);
10736     SDValue Op2 = N0->getOperand(2);
10737     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10738         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10739       // For any_extend, choose sign extension of the constants to allow a
10740       // possible further transform to sign_extend_inreg.i.e.
10741       //
10742       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10743       // t2: i64 = any_extend t1
10744       // -->
10745       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10746       // -->
10747       // t4: i64 = sign_extend_inreg t3
10748       unsigned FoldOpc = Opcode;
10749       if (FoldOpc == ISD::ANY_EXTEND)
10750         FoldOpc = ISD::SIGN_EXTEND;
10751       return DAG.getSelect(DL, VT, N0->getOperand(0),
10752                            DAG.getNode(FoldOpc, DL, VT, Op1),
10753                            DAG.getNode(FoldOpc, DL, VT, Op2));
10754     }
10755   }
10756
10757   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10758   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10759   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10760   EVT SVT = VT.getScalarType();
10761   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10762       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10763     return SDValue();
10764
10765   // We can fold this node into a build_vector.
10766   unsigned VTBits = SVT.getSizeInBits();
10767   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10768   SmallVector<SDValue, 8> Elts;
10769   unsigned NumElts = VT.getVectorNumElements();
10770
10771   // For zero-extensions, UNDEF elements still guarantee to have the upper
10772   // bits set to zero.
10773   bool IsZext =
10774       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10775
10776   for (unsigned i = 0; i != NumElts; ++i) {
10777     SDValue Op = N0.getOperand(i);
10778     if (Op.isUndef()) {
10779       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10780       continue;
10781     }
10782
10783     SDLoc DL(Op);
10784     // Get the constant value and if needed trunc it to the size of the type.
10785     // Nodes like build_vector might have constants wider than the scalar type.
10786     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10787     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10788       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10789     else
10790       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10791   }
10792
10793   return DAG.getBuildVector(VT, DL, Elts);
10794 }
10795
10796 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10797 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10798 // transformation. Returns true if extension are possible and the above
10799 // mentioned transformation is profitable.
10800 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10801                                     unsigned ExtOpc,
10802                                     SmallVectorImpl<SDNode *> &ExtendNodes,
10803                                     const TargetLowering &TLI) {
10804   bool HasCopyToRegUses = false;
10805   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10806   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10807                             UE = N0.getNode()->use_end();
10808        UI != UE; ++UI) {
10809     SDNode *User = *UI;
10810     if (User == N)
10811       continue;
10812     if (UI.getUse().getResNo() != N0.getResNo())
10813       continue;
10814     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10815     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10816       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10817       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10818         // Sign bits will be lost after a zext.
10819         return false;
10820       bool Add = false;
10821       for (unsigned i = 0; i != 2; ++i) {
10822         SDValue UseOp = User->getOperand(i);
10823         if (UseOp == N0)
10824           continue;
10825         if (!isa<ConstantSDNode>(UseOp))
10826           return false;
10827         Add = true;
10828       }
10829       if (Add)
10830         ExtendNodes.push_back(User);
10831       continue;
10832     }
10833     // If truncates aren't free and there are users we can't
10834     // extend, it isn't worthwhile.
10835     if (!isTruncFree)
10836       return false;
10837     // Remember if this value is live-out.
10838     if (User->getOpcode() == ISD::CopyToReg)
10839       HasCopyToRegUses = true;
10840   }
10841
10842   if (HasCopyToRegUses) {
10843     bool BothLiveOut = false;
10844     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10845          UI != UE; ++UI) {
10846       SDUse &Use = UI.getUse();
10847       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10848         BothLiveOut = true;
10849         break;
10850       }
10851     }
10852     if (BothLiveOut)
10853       // Both unextended and extended values are live out. There had better be
10854       // a good reason for the transformation.
10855       return ExtendNodes.size();
10856   }
10857   return true;
10858 }
10859
10860 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10861                                   SDValue OrigLoad, SDValue ExtLoad,
10862                                   ISD::NodeType ExtType) {
10863   // Extend SetCC uses if necessary.
10864   SDLoc DL(ExtLoad);
10865   for (SDNode *SetCC : SetCCs) {
10866     SmallVector<SDValue, 4> Ops;
10867
10868     for (unsigned j = 0; j != 2; ++j) {
10869       SDValue SOp = SetCC->getOperand(j);
10870       if (SOp == OrigLoad)
10871         Ops.push_back(ExtLoad);
10872       else
10873         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10874     }
10875
10876     Ops.push_back(SetCC->getOperand(2));
10877     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10878   }
10879 }
10880
10881 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10882 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
10883   SDValue N0 = N->getOperand(0);
10884   EVT DstVT = N->getValueType(0);
10885   EVT SrcVT = N0.getValueType();
10886
10887   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10888           N->getOpcode() == ISD::ZERO_EXTEND) &&
10889          "Unexpected node type (not an extend)!");
10890
10891   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
10892   // For example, on a target with legal v4i32, but illegal v8i32, turn:
10893   //   (v8i32 (sext (v8i16 (load x))))
10894   // into:
10895   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
10896   //                          (v4i32 (sextload (x + 16)))))
10897   // Where uses of the original load, i.e.:
10898   //   (v8i16 (load x))
10899   // are replaced with:
10900   //   (v8i16 (truncate
10901   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
10902   //                            (v4i32 (sextload (x + 16)))))))
10903   //
10904   // This combine is only applicable to illegal, but splittable, vectors.
10905   // All legal types, and illegal non-vector types, are handled elsewhere.
10906   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
10907   //
10908   if (N0->getOpcode() != ISD::LOAD)
10909     return SDValue();
10910
10911   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10912
10913   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
10914       !N0.hasOneUse() || !LN0->isSimple() ||
10915       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
10916       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10917     return SDValue();
10918
10919   SmallVector<SDNode *, 4> SetCCs;
10920   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
10921     return SDValue();
10922
10923   ISD::LoadExtType ExtType =
10924       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10925
10926   // Try to split the vector types to get down to legal types.
10927   EVT SplitSrcVT = SrcVT;
10928   EVT SplitDstVT = DstVT;
10929   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
10930          SplitSrcVT.getVectorNumElements() > 1) {
10931     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
10932     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
10933   }
10934
10935   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
10936     return SDValue();
10937
10938   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
10939
10940   SDLoc DL(N);
10941   const unsigned NumSplits =
10942       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
10943   const unsigned Stride = SplitSrcVT.getStoreSize();
10944   SmallVector<SDValue, 4> Loads;
10945   SmallVector<SDValue, 4> Chains;
10946
10947   SDValue BasePtr = LN0->getBasePtr();
10948   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
10949     const unsigned Offset = Idx * Stride;
10950     const Align Align = commonAlignment(LN0->getAlign(), Offset);
10951
10952     SDValue SplitLoad = DAG.getExtLoad(
10953         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
10954         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
10955         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10956
10957     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
10958
10959     Loads.push_back(SplitLoad.getValue(0));
10960     Chains.push_back(SplitLoad.getValue(1));
10961   }
10962
10963   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10964   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
10965
10966   // Simplify TF.
10967   AddToWorklist(NewChain.getNode());
10968
10969   CombineTo(N, NewValue);
10970
10971   // Replace uses of the original load (before extension)
10972   // with a truncate of the concatenated sextloaded vectors.
10973   SDValue Trunc =
10974       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
10975   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
10976   CombineTo(N0.getNode(), Trunc, NewChain);
10977   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10978 }
10979
10980 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10981 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10982 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
10983   assert(N->getOpcode() == ISD::ZERO_EXTEND);
10984   EVT VT = N->getValueType(0);
10985   EVT OrigVT = N->getOperand(0).getValueType();
10986   if (TLI.isZExtFree(OrigVT, VT))
10987     return SDValue();
10988
10989   // and/or/xor
10990   SDValue N0 = N->getOperand(0);
10991   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10992         N0.getOpcode() == ISD::XOR) ||
10993       N0.getOperand(1).getOpcode() != ISD::Constant ||
10994       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
10995     return SDValue();
10996
10997   // shl/shr
10998   SDValue N1 = N0->getOperand(0);
10999   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
11000       N1.getOperand(1).getOpcode() != ISD::Constant ||
11001       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
11002     return SDValue();
11003
11004   // load
11005   if (!isa<LoadSDNode>(N1.getOperand(0)))
11006     return SDValue();
11007   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
11008   EVT MemVT = Load->getMemoryVT();
11009   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
11010       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
11011     return SDValue();
11012
11013
11014   // If the shift op is SHL, the logic op must be AND, otherwise the result
11015   // will be wrong.
11016   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
11017     return SDValue();
11018
11019   if (!N0.hasOneUse() || !N1.hasOneUse())
11020     return SDValue();
11021
11022   SmallVector<SDNode*, 4> SetCCs;
11023   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
11024                                ISD::ZERO_EXTEND, SetCCs, TLI))
11025     return SDValue();
11026
11027   // Actually do the transformation.
11028   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
11029                                    Load->getChain(), Load->getBasePtr(),
11030                                    Load->getMemoryVT(), Load->getMemOperand());
11031
11032   SDLoc DL1(N1);
11033   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
11034                               N1.getOperand(1));
11035
11036   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11037   SDLoc DL0(N0);
11038   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
11039                             DAG.getConstant(Mask, DL0, VT));
11040
11041   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11042   CombineTo(N, And);
11043   if (SDValue(Load, 0).hasOneUse()) {
11044     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
11045   } else {
11046     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
11047                                 Load->getValueType(0), ExtLoad);
11048     CombineTo(Load, Trunc, ExtLoad.getValue(1));
11049   }
11050
11051   // N0 is dead at this point.
11052   recursivelyDeleteUnusedNodes(N0.getNode());
11053
11054   return SDValue(N,0); // Return N so it doesn't get rechecked!
11055 }
11056
11057 /// If we're narrowing or widening the result of a vector select and the final
11058 /// size is the same size as a setcc (compare) feeding the select, then try to
11059 /// apply the cast operation to the select's operands because matching vector
11060 /// sizes for a select condition and other operands should be more efficient.
11061 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
11062   unsigned CastOpcode = Cast->getOpcode();
11063   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
11064           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
11065           CastOpcode == ISD::FP_ROUND) &&
11066          "Unexpected opcode for vector select narrowing/widening");
11067
11068   // We only do this transform before legal ops because the pattern may be
11069   // obfuscated by target-specific operations after legalization. Do not create
11070   // an illegal select op, however, because that may be difficult to lower.
11071   EVT VT = Cast->getValueType(0);
11072   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
11073     return SDValue();
11074
11075   SDValue VSel = Cast->getOperand(0);
11076   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
11077       VSel.getOperand(0).getOpcode() != ISD::SETCC)
11078     return SDValue();
11079
11080   // Does the setcc have the same vector size as the casted select?
11081   SDValue SetCC = VSel.getOperand(0);
11082   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
11083   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
11084     return SDValue();
11085
11086   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
11087   SDValue A = VSel.getOperand(1);
11088   SDValue B = VSel.getOperand(2);
11089   SDValue CastA, CastB;
11090   SDLoc DL(Cast);
11091   if (CastOpcode == ISD::FP_ROUND) {
11092     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
11093     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
11094     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
11095   } else {
11096     CastA = DAG.getNode(CastOpcode, DL, VT, A);
11097     CastB = DAG.getNode(CastOpcode, DL, VT, B);
11098   }
11099   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
11100 }
11101
11102 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11103 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11104 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
11105                                      const TargetLowering &TLI, EVT VT,
11106                                      bool LegalOperations, SDNode *N,
11107                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
11108   SDNode *N0Node = N0.getNode();
11109   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
11110                                                    : ISD::isZEXTLoad(N0Node);
11111   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
11112       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
11113     return SDValue();
11114
11115   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11116   EVT MemVT = LN0->getMemoryVT();
11117   if ((LegalOperations || !LN0->isSimple() ||
11118        VT.isVector()) &&
11119       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
11120     return SDValue();
11121
11122   SDValue ExtLoad =
11123       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11124                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
11125   Combiner.CombineTo(N, ExtLoad);
11126   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11127   if (LN0->use_empty())
11128     Combiner.recursivelyDeleteUnusedNodes(LN0);
11129   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11130 }
11131
11132 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11133 // Only generate vector extloads when 1) they're legal, and 2) they are
11134 // deemed desirable by the target.
11135 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
11136                                   const TargetLowering &TLI, EVT VT,
11137                                   bool LegalOperations, SDNode *N, SDValue N0,
11138                                   ISD::LoadExtType ExtLoadType,
11139                                   ISD::NodeType ExtOpc) {
11140   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
11141       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
11142       ((LegalOperations || VT.isVector() ||
11143         !cast<LoadSDNode>(N0)->isSimple()) &&
11144        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
11145     return {};
11146
11147   bool DoXform = true;
11148   SmallVector<SDNode *, 4> SetCCs;
11149   if (!N0.hasOneUse())
11150     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
11151   if (VT.isVector())
11152     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
11153   if (!DoXform)
11154     return {};
11155
11156   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11157   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11158                                    LN0->getBasePtr(), N0.getValueType(),
11159                                    LN0->getMemOperand());
11160   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
11161   // If the load value is used only by N, replace it via CombineTo N.
11162   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
11163   Combiner.CombineTo(N, ExtLoad);
11164   if (NoReplaceTrunc) {
11165     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11166     Combiner.recursivelyDeleteUnusedNodes(LN0);
11167   } else {
11168     SDValue Trunc =
11169         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11170     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11171   }
11172   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11173 }
11174
11175 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
11176                                         const TargetLowering &TLI, EVT VT,
11177                                         SDNode *N, SDValue N0,
11178                                         ISD::LoadExtType ExtLoadType,
11179                                         ISD::NodeType ExtOpc) {
11180   if (!N0.hasOneUse())
11181     return SDValue();
11182
11183   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
11184   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
11185     return SDValue();
11186
11187   if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
11188     return SDValue();
11189
11190   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11191     return SDValue();
11192
11193   SDLoc dl(Ld);
11194   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
11195   SDValue NewLoad = DAG.getMaskedLoad(
11196       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
11197       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
11198       ExtLoadType, Ld->isExpandingLoad());
11199   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
11200   return NewLoad;
11201 }
11202
11203 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
11204                                        bool LegalOperations) {
11205   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11206           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
11207
11208   SDValue SetCC = N->getOperand(0);
11209   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
11210       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
11211     return SDValue();
11212
11213   SDValue X = SetCC.getOperand(0);
11214   SDValue Ones = SetCC.getOperand(1);
11215   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
11216   EVT VT = N->getValueType(0);
11217   EVT XVT = X.getValueType();
11218   // setge X, C is canonicalized to setgt, so we do not need to match that
11219   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
11220   // not require the 'not' op.
11221   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
11222     // Invert and smear/shift the sign bit:
11223     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
11224     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
11225     SDLoc DL(N);
11226     unsigned ShCt = VT.getSizeInBits() - 1;
11227     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11228     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
11229       SDValue NotX = DAG.getNOT(DL, X, VT);
11230       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
11231       auto ShiftOpcode =
11232         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
11233       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
11234     }
11235   }
11236   return SDValue();
11237 }
11238
11239 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
11240   SDValue N0 = N->getOperand(0);
11241   if (N0.getOpcode() != ISD::SETCC)
11242     return SDValue();
11243
11244   SDValue N00 = N0.getOperand(0);
11245   SDValue N01 = N0.getOperand(1);
11246   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11247   EVT VT = N->getValueType(0);
11248   EVT N00VT = N00.getValueType();
11249   SDLoc DL(N);
11250
11251   // On some architectures (such as SSE/NEON/etc) the SETCC result type is
11252   // the same size as the compared operands. Try to optimize sext(setcc())
11253   // if this is the case.
11254   if (VT.isVector() && !LegalOperations &&
11255       TLI.getBooleanContents(N00VT) ==
11256           TargetLowering::ZeroOrNegativeOneBooleanContent) {
11257     EVT SVT = getSetCCResultType(N00VT);
11258
11259     // If we already have the desired type, don't change it.
11260     if (SVT != N0.getValueType()) {
11261       // We know that the # elements of the results is the same as the
11262       // # elements of the compare (and the # elements of the compare result
11263       // for that matter).  Check to see that they are the same size.  If so,
11264       // we know that the element size of the sext'd result matches the
11265       // element size of the compare operands.
11266       if (VT.getSizeInBits() == SVT.getSizeInBits())
11267         return DAG.getSetCC(DL, VT, N00, N01, CC);
11268
11269       // If the desired elements are smaller or larger than the source
11270       // elements, we can use a matching integer vector type and then
11271       // truncate/sign extend.
11272       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
11273       if (SVT == MatchingVecType) {
11274         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
11275         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
11276       }
11277     }
11278
11279     // Try to eliminate the sext of a setcc by zexting the compare operands.
11280     if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
11281         !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
11282       bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
11283       unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11284       unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11285
11286       // We have an unsupported narrow vector compare op that would be legal
11287       // if extended to the destination type. See if the compare operands
11288       // can be freely extended to the destination type.
11289       auto IsFreeToExtend = [&](SDValue V) {
11290         if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
11291           return true;
11292         // Match a simple, non-extended load that can be converted to a
11293         // legal {z/s}ext-load.
11294         // TODO: Allow widening of an existing {z/s}ext-load?
11295         if (!(ISD::isNON_EXTLoad(V.getNode()) &&
11296               ISD::isUNINDEXEDLoad(V.getNode()) &&
11297               cast<LoadSDNode>(V)->isSimple() &&
11298               TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
11299           return false;
11300
11301         // Non-chain users of this value must either be the setcc in this
11302         // sequence or extends that can be folded into the new {z/s}ext-load.
11303         for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
11304              UI != UE; ++UI) {
11305           // Skip uses of the chain and the setcc.
11306           SDNode *User = *UI;
11307           if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11308             continue;
11309           // Extra users must have exactly the same cast we are about to create.
11310           // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11311           //       is enhanced similarly.
11312           if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11313             return false;
11314         }
11315         return true;
11316       };
11317
11318       if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
11319         SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11320         SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11321         return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11322       }
11323     }
11324   }
11325
11326   // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11327   // Here, T can be 1 or -1, depending on the type of the setcc and
11328   // getBooleanContents().
11329   unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11330
11331   // To determine the "true" side of the select, we need to know the high bit
11332   // of the value returned by the setcc if it evaluates to true.
11333   // If the type of the setcc is i1, then the true case of the select is just
11334   // sext(i1 1), that is, -1.
11335   // If the type of the setcc is larger (say, i8) then the value of the high
11336   // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11337   // of the appropriate width.
11338   SDValue ExtTrueVal = (SetCCWidth == 1)
11339                            ? DAG.getAllOnesConstant(DL, VT)
11340                            : DAG.getBoolConstant(true, DL, VT, N00VT);
11341   SDValue Zero = DAG.getConstant(0, DL, VT);
11342   if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11343     return SCC;
11344
11345   if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11346     EVT SetCCVT = getSetCCResultType(N00VT);
11347     // Don't do this transform for i1 because there's a select transform
11348     // that would reverse it.
11349     // TODO: We should not do this transform at all without a target hook
11350     // because a sext is likely cheaper than a select?
11351     if (SetCCVT.getScalarSizeInBits() != 1 &&
11352         (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11353       SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11354       return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11355     }
11356   }
11357
11358   return SDValue();
11359 }
11360
11361 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11362   SDValue N0 = N->getOperand(0);
11363   EVT VT = N->getValueType(0);
11364   SDLoc DL(N);
11365
11366   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11367     return Res;
11368
11369   // fold (sext (sext x)) -> (sext x)
11370   // fold (sext (aext x)) -> (sext x)
11371   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11372     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11373
11374   if (N0.getOpcode() == ISD::TRUNCATE) {
11375     // fold (sext (truncate (load x))) -> (sext (smaller load x))
11376     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11377     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11378       SDNode *oye = N0.getOperand(0).getNode();
11379       if (NarrowLoad.getNode() != N0.getNode()) {
11380         CombineTo(N0.getNode(), NarrowLoad);
11381         // CombineTo deleted the truncate, if needed, but not what's under it.
11382         AddToWorklist(oye);
11383       }
11384       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11385     }
11386
11387     // See if the value being truncated is already sign extended.  If so, just
11388     // eliminate the trunc/sext pair.
11389     SDValue Op = N0.getOperand(0);
11390     unsigned OpBits   = Op.getScalarValueSizeInBits();
11391     unsigned MidBits  = N0.getScalarValueSizeInBits();
11392     unsigned DestBits = VT.getScalarSizeInBits();
11393     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11394
11395     if (OpBits == DestBits) {
11396       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
11397       // bits, it is already ready.
11398       if (NumSignBits > DestBits-MidBits)
11399         return Op;
11400     } else if (OpBits < DestBits) {
11401       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
11402       // bits, just sext from i32.
11403       if (NumSignBits > OpBits-MidBits)
11404         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11405     } else {
11406       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
11407       // bits, just truncate to i32.
11408       if (NumSignBits > OpBits-MidBits)
11409         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11410     }
11411
11412     // fold (sext (truncate x)) -> (sextinreg x).
11413     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11414                                                  N0.getValueType())) {
11415       if (OpBits < DestBits)
11416         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11417       else if (OpBits > DestBits)
11418         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11419       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11420                          DAG.getValueType(N0.getValueType()));
11421     }
11422   }
11423
11424   // Try to simplify (sext (load x)).
11425   if (SDValue foldedExt =
11426           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11427                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
11428     return foldedExt;
11429
11430   if (SDValue foldedExt =
11431       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11432                                ISD::SIGN_EXTEND))
11433     return foldedExt;
11434
11435   // fold (sext (load x)) to multiple smaller sextloads.
11436   // Only on illegal but splittable vectors.
11437   if (SDValue ExtLoad = CombineExtLoad(N))
11438     return ExtLoad;
11439
11440   // Try to simplify (sext (sextload x)).
11441   if (SDValue foldedExt = tryToFoldExtOfExtload(
11442           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11443     return foldedExt;
11444
11445   // fold (sext (and/or/xor (load x), cst)) ->
11446   //      (and/or/xor (sextload x), (sext cst))
11447   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11448        N0.getOpcode() == ISD::XOR) &&
11449       isa<LoadSDNode>(N0.getOperand(0)) &&
11450       N0.getOperand(1).getOpcode() == ISD::Constant &&
11451       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11452     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11453     EVT MemVT = LN00->getMemoryVT();
11454     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11455       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11456       SmallVector<SDNode*, 4> SetCCs;
11457       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11458                                              ISD::SIGN_EXTEND, SetCCs, TLI);
11459       if (DoXform) {
11460         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
11461                                          LN00->getChain(), LN00->getBasePtr(),
11462                                          LN00->getMemoryVT(),
11463                                          LN00->getMemOperand());
11464         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
11465         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11466                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
11467         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
11468         bool NoReplaceTruncAnd = !N0.hasOneUse();
11469         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11470         CombineTo(N, And);
11471         // If N0 has multiple uses, change other uses as well.
11472         if (NoReplaceTruncAnd) {
11473           SDValue TruncAnd =
11474               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11475           CombineTo(N0.getNode(), TruncAnd);
11476         }
11477         if (NoReplaceTrunc) {
11478           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11479         } else {
11480           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11481                                       LN00->getValueType(0), ExtLoad);
11482           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11483         }
11484         return SDValue(N,0); // Return N so it doesn't get rechecked!
11485       }
11486     }
11487   }
11488
11489   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11490     return V;
11491
11492   if (SDValue V = foldSextSetcc(N))
11493     return V;
11494
11495   // fold (sext x) -> (zext x) if the sign bit is known zero.
11496   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11497       DAG.SignBitIsZero(N0))
11498     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11499
11500   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11501     return NewVSel;
11502
11503   // Eliminate this sign extend by doing a negation in the destination type:
11504   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11505   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11506       isNullOrNullSplat(N0.getOperand(0)) &&
11507       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
11508       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
11509     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11510     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11511   }
11512   // Eliminate this sign extend by doing a decrement in the destination type:
11513   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11514   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11515       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
11516       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11517       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
11518     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11519     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11520   }
11521
11522   // fold sext (not i1 X) -> add (zext i1 X), -1
11523   // TODO: This could be extended to handle bool vectors.
11524   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11525       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11526                             TLI.isOperationLegal(ISD::ADD, VT)))) {
11527     // If we can eliminate the 'not', the sext form should be better
11528     if (SDValue NewXor = visitXOR(N0.getNode())) {
11529       // Returning N0 is a form of in-visit replacement that may have
11530       // invalidated N0.
11531       if (NewXor.getNode() == N0.getNode()) {
11532         // Return SDValue here as the xor should have already been replaced in
11533         // this sext.
11534         return SDValue();
11535       } else {
11536         // Return a new sext with the new xor.
11537         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11538       }
11539     }
11540
11541     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11542     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11543   }
11544
11545   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11546     return Res;
11547
11548   return SDValue();
11549 }
11550
11551 // isTruncateOf - If N is a truncate of some other value, return true, record
11552 // the value being truncated in Op and which of Op's bits are zero/one in Known.
11553 // This function computes KnownBits to avoid a duplicated call to
11554 // computeKnownBits in the caller.
11555 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
11556                          KnownBits &Known) {
11557   if (N->getOpcode() == ISD::TRUNCATE) {
11558     Op = N->getOperand(0);
11559     Known = DAG.computeKnownBits(Op);
11560     return true;
11561   }
11562
11563   if (N.getOpcode() != ISD::SETCC ||
11564       N.getValueType().getScalarType() != MVT::i1 ||
11565       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11566     return false;
11567
11568   SDValue Op0 = N->getOperand(0);
11569   SDValue Op1 = N->getOperand(1);
11570   assert(Op0.getValueType() == Op1.getValueType());
11571
11572   if (isNullOrNullSplat(Op0))
11573     Op = Op1;
11574   else if (isNullOrNullSplat(Op1))
11575     Op = Op0;
11576   else
11577     return false;
11578
11579   Known = DAG.computeKnownBits(Op);
11580
11581   return (Known.Zero | 1).isAllOnes();
11582 }
11583
11584 /// Given an extending node with a pop-count operand, if the target does not
11585 /// support a pop-count in the narrow source type but does support it in the
11586 /// destination type, widen the pop-count to the destination type.
11587 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
11588   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
11589           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
11590
11591   SDValue CtPop = Extend->getOperand(0);
11592   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11593     return SDValue();
11594
11595   EVT VT = Extend->getValueType(0);
11596   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11597   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11598       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11599     return SDValue();
11600
11601   // zext (ctpop X) --> ctpop (zext X)
11602   SDLoc DL(Extend);
11603   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11604   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11605 }
11606
11607 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11608   SDValue N0 = N->getOperand(0);
11609   EVT VT = N->getValueType(0);
11610
11611   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11612     return Res;
11613
11614   // fold (zext (zext x)) -> (zext x)
11615   // fold (zext (aext x)) -> (zext x)
11616   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11617     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11618                        N0.getOperand(0));
11619
11620   // fold (zext (truncate x)) -> (zext x) or
11621   //      (zext (truncate x)) -> (truncate x)
11622   // This is valid when the truncated bits of x are already zero.
11623   SDValue Op;
11624   KnownBits Known;
11625   if (isTruncateOf(DAG, N0, Op, Known)) {
11626     APInt TruncatedBits =
11627       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11628       APInt(Op.getScalarValueSizeInBits(), 0) :
11629       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11630                         N0.getScalarValueSizeInBits(),
11631                         std::min(Op.getScalarValueSizeInBits(),
11632                                  VT.getScalarSizeInBits()));
11633     if (TruncatedBits.isSubsetOf(Known.Zero))
11634       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11635   }
11636
11637   // fold (zext (truncate x)) -> (and x, mask)
11638   if (N0.getOpcode() == ISD::TRUNCATE) {
11639     // fold (zext (truncate (load x))) -> (zext (smaller load x))
11640     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11641     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11642       SDNode *oye = N0.getOperand(0).getNode();
11643       if (NarrowLoad.getNode() != N0.getNode()) {
11644         CombineTo(N0.getNode(), NarrowLoad);
11645         // CombineTo deleted the truncate, if needed, but not what's under it.
11646         AddToWorklist(oye);
11647       }
11648       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11649     }
11650
11651     EVT SrcVT = N0.getOperand(0).getValueType();
11652     EVT MinVT = N0.getValueType();
11653
11654     // Try to mask before the extension to avoid having to generate a larger mask,
11655     // possibly over several sub-vectors.
11656     if (SrcVT.bitsLT(VT) && VT.isVector()) {
11657       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11658                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11659         SDValue Op = N0.getOperand(0);
11660         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11661         AddToWorklist(Op.getNode());
11662         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11663         // Transfer the debug info; the new node is equivalent to N0.
11664         DAG.transferDbgValues(N0, ZExtOrTrunc);
11665         return ZExtOrTrunc;
11666       }
11667     }
11668
11669     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11670       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11671       AddToWorklist(Op.getNode());
11672       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11673       // We may safely transfer the debug info describing the truncate node over
11674       // to the equivalent and operation.
11675       DAG.transferDbgValues(N0, And);
11676       return And;
11677     }
11678   }
11679
11680   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11681   // if either of the casts is not free.
11682   if (N0.getOpcode() == ISD::AND &&
11683       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11684       N0.getOperand(1).getOpcode() == ISD::Constant &&
11685       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11686                            N0.getValueType()) ||
11687        !TLI.isZExtFree(N0.getValueType(), VT))) {
11688     SDValue X = N0.getOperand(0).getOperand(0);
11689     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11690     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11691     SDLoc DL(N);
11692     return DAG.getNode(ISD::AND, DL, VT,
11693                        X, DAG.getConstant(Mask, DL, VT));
11694   }
11695
11696   // Try to simplify (zext (load x)).
11697   if (SDValue foldedExt =
11698           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11699                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11700     return foldedExt;
11701
11702   if (SDValue foldedExt =
11703       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11704                                ISD::ZERO_EXTEND))
11705     return foldedExt;
11706
11707   // fold (zext (load x)) to multiple smaller zextloads.
11708   // Only on illegal but splittable vectors.
11709   if (SDValue ExtLoad = CombineExtLoad(N))
11710     return ExtLoad;
11711
11712   // fold (zext (and/or/xor (load x), cst)) ->
11713   //      (and/or/xor (zextload x), (zext cst))
11714   // Unless (and (load x) cst) will match as a zextload already and has
11715   // additional users.
11716   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11717        N0.getOpcode() == ISD::XOR) &&
11718       isa<LoadSDNode>(N0.getOperand(0)) &&
11719       N0.getOperand(1).getOpcode() == ISD::Constant &&
11720       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11721     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11722     EVT MemVT = LN00->getMemoryVT();
11723     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11724         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11725       bool DoXform = true;
11726       SmallVector<SDNode*, 4> SetCCs;
11727       if (!N0.hasOneUse()) {
11728         if (N0.getOpcode() == ISD::AND) {
11729           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11730           EVT LoadResultTy = AndC->getValueType(0);
11731           EVT ExtVT;
11732           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11733             DoXform = false;
11734         }
11735       }
11736       if (DoXform)
11737         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11738                                           ISD::ZERO_EXTEND, SetCCs, TLI);
11739       if (DoXform) {
11740         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11741                                          LN00->getChain(), LN00->getBasePtr(),
11742                                          LN00->getMemoryVT(),
11743                                          LN00->getMemOperand());
11744         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11745         SDLoc DL(N);
11746         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11747                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
11748         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11749         bool NoReplaceTruncAnd = !N0.hasOneUse();
11750         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11751         CombineTo(N, And);
11752         // If N0 has multiple uses, change other uses as well.
11753         if (NoReplaceTruncAnd) {
11754           SDValue TruncAnd =
11755               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11756           CombineTo(N0.getNode(), TruncAnd);
11757         }
11758         if (NoReplaceTrunc) {
11759           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11760         } else {
11761           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11762                                       LN00->getValueType(0), ExtLoad);
11763           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11764         }
11765         return SDValue(N,0); // Return N so it doesn't get rechecked!
11766       }
11767     }
11768   }
11769
11770   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11771   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11772   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11773     return ZExtLoad;
11774
11775   // Try to simplify (zext (zextload x)).
11776   if (SDValue foldedExt = tryToFoldExtOfExtload(
11777           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11778     return foldedExt;
11779
11780   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11781     return V;
11782
11783   if (N0.getOpcode() == ISD::SETCC) {
11784     // Only do this before legalize for now.
11785     if (!LegalOperations && VT.isVector() &&
11786         N0.getValueType().getVectorElementType() == MVT::i1) {
11787       EVT N00VT = N0.getOperand(0).getValueType();
11788       if (getSetCCResultType(N00VT) == N0.getValueType())
11789         return SDValue();
11790
11791       // We know that the # elements of the results is the same as the #
11792       // elements of the compare (and the # elements of the compare result for
11793       // that matter). Check to see that they are the same size. If so, we know
11794       // that the element size of the sext'd result matches the element size of
11795       // the compare operands.
11796       SDLoc DL(N);
11797       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11798         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11799         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11800                                      N0.getOperand(1), N0.getOperand(2));
11801         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11802       }
11803
11804       // If the desired elements are smaller or larger than the source
11805       // elements we can use a matching integer vector type and then
11806       // truncate/any extend followed by zext_in_reg.
11807       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11808       SDValue VsetCC =
11809           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11810                       N0.getOperand(1), N0.getOperand(2));
11811       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11812                                     N0.getValueType());
11813     }
11814
11815     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11816     SDLoc DL(N);
11817     EVT N0VT = N0.getValueType();
11818     EVT N00VT = N0.getOperand(0).getValueType();
11819     if (SDValue SCC = SimplifySelectCC(
11820             DL, N0.getOperand(0), N0.getOperand(1),
11821             DAG.getBoolConstant(true, DL, N0VT, N00VT),
11822             DAG.getBoolConstant(false, DL, N0VT, N00VT),
11823             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11824       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11825   }
11826
11827   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11828   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11829       isa<ConstantSDNode>(N0.getOperand(1)) &&
11830       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11831       N0.hasOneUse()) {
11832     SDValue ShAmt = N0.getOperand(1);
11833     if (N0.getOpcode() == ISD::SHL) {
11834       SDValue InnerZExt = N0.getOperand(0);
11835       // If the original shl may be shifting out bits, do not perform this
11836       // transformation.
11837       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11838         InnerZExt.getOperand(0).getValueSizeInBits();
11839       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11840         return SDValue();
11841     }
11842
11843     SDLoc DL(N);
11844
11845     // Ensure that the shift amount is wide enough for the shifted value.
11846     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11847       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11848
11849     return DAG.getNode(N0.getOpcode(), DL, VT,
11850                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11851                        ShAmt);
11852   }
11853
11854   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11855     return NewVSel;
11856
11857   if (SDValue NewCtPop = widenCtPop(N, DAG))
11858     return NewCtPop;
11859
11860   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11861     return Res;
11862
11863   return SDValue();
11864 }
11865
11866 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11867   SDValue N0 = N->getOperand(0);
11868   EVT VT = N->getValueType(0);
11869
11870   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11871     return Res;
11872
11873   // fold (aext (aext x)) -> (aext x)
11874   // fold (aext (zext x)) -> (zext x)
11875   // fold (aext (sext x)) -> (sext x)
11876   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
11877       N0.getOpcode() == ISD::ZERO_EXTEND ||
11878       N0.getOpcode() == ISD::SIGN_EXTEND)
11879     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11880
11881   // fold (aext (truncate (load x))) -> (aext (smaller load x))
11882   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
11883   if (N0.getOpcode() == ISD::TRUNCATE) {
11884     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11885       SDNode *oye = N0.getOperand(0).getNode();
11886       if (NarrowLoad.getNode() != N0.getNode()) {
11887         CombineTo(N0.getNode(), NarrowLoad);
11888         // CombineTo deleted the truncate, if needed, but not what's under it.
11889         AddToWorklist(oye);
11890       }
11891       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11892     }
11893   }
11894
11895   // fold (aext (truncate x))
11896   if (N0.getOpcode() == ISD::TRUNCATE)
11897     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11898
11899   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
11900   // if the trunc is not free.
11901   if (N0.getOpcode() == ISD::AND &&
11902       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11903       N0.getOperand(1).getOpcode() == ISD::Constant &&
11904       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11905                           N0.getValueType())) {
11906     SDLoc DL(N);
11907     SDValue X = N0.getOperand(0).getOperand(0);
11908     X = DAG.getAnyExtOrTrunc(X, DL, VT);
11909     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11910     return DAG.getNode(ISD::AND, DL, VT,
11911                        X, DAG.getConstant(Mask, DL, VT));
11912   }
11913
11914   // fold (aext (load x)) -> (aext (truncate (extload x)))
11915   // None of the supported targets knows how to perform load and any_ext
11916   // on vectors in one instruction, so attempt to fold to zext instead.
11917   if (VT.isVector()) {
11918     // Try to simplify (zext (load x)).
11919     if (SDValue foldedExt =
11920             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11921                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11922       return foldedExt;
11923   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
11924              ISD::isUNINDEXEDLoad(N0.getNode()) &&
11925              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11926     bool DoXform = true;
11927     SmallVector<SDNode *, 4> SetCCs;
11928     if (!N0.hasOneUse())
11929       DoXform =
11930           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
11931     if (DoXform) {
11932       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11933       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11934                                        LN0->getChain(), LN0->getBasePtr(),
11935                                        N0.getValueType(), LN0->getMemOperand());
11936       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
11937       // If the load value is used only by N, replace it via CombineTo N.
11938       bool NoReplaceTrunc = N0.hasOneUse();
11939       CombineTo(N, ExtLoad);
11940       if (NoReplaceTrunc) {
11941         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11942         recursivelyDeleteUnusedNodes(LN0);
11943       } else {
11944         SDValue Trunc =
11945             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11946         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11947       }
11948       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11949     }
11950   }
11951
11952   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
11953   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
11954   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
11955   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
11956       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
11957     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11958     ISD::LoadExtType ExtType = LN0->getExtensionType();
11959     EVT MemVT = LN0->getMemoryVT();
11960     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
11961       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
11962                                        VT, LN0->getChain(), LN0->getBasePtr(),
11963                                        MemVT, LN0->getMemOperand());
11964       CombineTo(N, ExtLoad);
11965       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11966       recursivelyDeleteUnusedNodes(LN0);
11967       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11968     }
11969   }
11970
11971   if (N0.getOpcode() == ISD::SETCC) {
11972     // For vectors:
11973     // aext(setcc) -> vsetcc
11974     // aext(setcc) -> truncate(vsetcc)
11975     // aext(setcc) -> aext(vsetcc)
11976     // Only do this before legalize for now.
11977     if (VT.isVector() && !LegalOperations) {
11978       EVT N00VT = N0.getOperand(0).getValueType();
11979       if (getSetCCResultType(N00VT) == N0.getValueType())
11980         return SDValue();
11981
11982       // We know that the # elements of the results is the same as the
11983       // # elements of the compare (and the # elements of the compare result
11984       // for that matter).  Check to see that they are the same size.  If so,
11985       // we know that the element size of the sext'd result matches the
11986       // element size of the compare operands.
11987       if (VT.getSizeInBits() == N00VT.getSizeInBits())
11988         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
11989                              N0.getOperand(1),
11990                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
11991
11992       // If the desired elements are smaller or larger than the source
11993       // elements we can use a matching integer vector type and then
11994       // truncate/any extend
11995       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11996       SDValue VsetCC =
11997         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
11998                       N0.getOperand(1),
11999                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
12000       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
12001     }
12002
12003     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
12004     SDLoc DL(N);
12005     if (SDValue SCC = SimplifySelectCC(
12006             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
12007             DAG.getConstant(0, DL, VT),
12008             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12009       return SCC;
12010   }
12011
12012   if (SDValue NewCtPop = widenCtPop(N, DAG))
12013     return NewCtPop;
12014
12015   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12016     return Res;
12017
12018   return SDValue();
12019 }
12020
12021 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
12022   unsigned Opcode = N->getOpcode();
12023   SDValue N0 = N->getOperand(0);
12024   SDValue N1 = N->getOperand(1);
12025   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
12026
12027   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
12028   if (N0.getOpcode() == Opcode &&
12029       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
12030     return N0;
12031
12032   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12033       N0.getOperand(0).getOpcode() == Opcode) {
12034     // We have an assert, truncate, assert sandwich. Make one stronger assert
12035     // by asserting on the smallest asserted type to the larger source type.
12036     // This eliminates the later assert:
12037     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
12038     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
12039     SDValue BigA = N0.getOperand(0);
12040     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12041     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
12042            "Asserting zero/sign-extended bits to a type larger than the "
12043            "truncated destination does not provide information");
12044
12045     SDLoc DL(N);
12046     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
12047     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
12048     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12049                                     BigA.getOperand(0), MinAssertVTVal);
12050     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12051   }
12052
12053   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
12054   // than X. Just move the AssertZext in front of the truncate and drop the
12055   // AssertSExt.
12056   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12057       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
12058       Opcode == ISD::AssertZext) {
12059     SDValue BigA = N0.getOperand(0);
12060     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12061     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
12062            "Asserting zero/sign-extended bits to a type larger than the "
12063            "truncated destination does not provide information");
12064
12065     if (AssertVT.bitsLT(BigA_AssertVT)) {
12066       SDLoc DL(N);
12067       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12068                                       BigA.getOperand(0), N1);
12069       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12070     }
12071   }
12072
12073   return SDValue();
12074 }
12075
12076 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
12077   SDLoc DL(N);
12078
12079   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
12080   SDValue N0 = N->getOperand(0);
12081
12082   // Fold (assertalign (assertalign x, AL0), AL1) ->
12083   // (assertalign x, max(AL0, AL1))
12084   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
12085     return DAG.getAssertAlign(DL, N0.getOperand(0),
12086                               std::max(AL, AAN->getAlign()));
12087
12088   // In rare cases, there are trivial arithmetic ops in source operands. Sink
12089   // this assert down to source operands so that those arithmetic ops could be
12090   // exposed to the DAG combining.
12091   switch (N0.getOpcode()) {
12092   default:
12093     break;
12094   case ISD::ADD:
12095   case ISD::SUB: {
12096     unsigned AlignShift = Log2(AL);
12097     SDValue LHS = N0.getOperand(0);
12098     SDValue RHS = N0.getOperand(1);
12099     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
12100     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12101     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
12102       if (LHSAlignShift < AlignShift)
12103         LHS = DAG.getAssertAlign(DL, LHS, AL);
12104       if (RHSAlignShift < AlignShift)
12105         RHS = DAG.getAssertAlign(DL, RHS, AL);
12106       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
12107     }
12108     break;
12109   }
12110   }
12111
12112   return SDValue();
12113 }
12114
12115 /// If the result of a load is shifted/masked/truncated to an effectively
12116 /// narrower type, try to transform the load to a narrower type and/or
12117 /// use an extending load.
12118 SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
12119   unsigned Opc = N->getOpcode();
12120
12121   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
12122   SDValue N0 = N->getOperand(0);
12123   EVT VT = N->getValueType(0);
12124   EVT ExtVT = VT;
12125
12126   // This transformation isn't valid for vector loads.
12127   if (VT.isVector())
12128     return SDValue();
12129
12130   // The ShAmt variable is used to indicate that we've consumed a right
12131   // shift. I.e. we want to narrow the width of the load by skipping to load the
12132   // ShAmt least significant bits.
12133   unsigned ShAmt = 0;
12134   // A special case is when the least significant bits from the load are masked
12135   // away, but using an AND rather than a right shift. HasShiftedOffset is used
12136   // to indicate that the narrowed load should be left-shifted ShAmt bits to get
12137   // the result.
12138   bool HasShiftedOffset = false;
12139   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
12140   // extended to VT.
12141   if (Opc == ISD::SIGN_EXTEND_INREG) {
12142     ExtType = ISD::SEXTLOAD;
12143     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12144   } else if (Opc == ISD::SRL) {
12145     // Another special-case: SRL is basically zero-extending a narrower value,
12146     // or it may be shifting a higher subword, half or byte into the lowest
12147     // bits.
12148
12149     // Only handle shift with constant shift amount, and the shiftee must be a
12150     // load.
12151     auto *LN = dyn_cast<LoadSDNode>(N0);
12152     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12153     if (!N1C || !LN)
12154       return SDValue();
12155     // If the shift amount is larger than the memory type then we're not
12156     // accessing any of the loaded bytes.
12157     ShAmt = N1C->getZExtValue();
12158     uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
12159     if (MemoryWidth <= ShAmt)
12160       return SDValue();
12161     // Attempt to fold away the SRL by using ZEXTLOAD.
12162     ExtType = ISD::ZEXTLOAD;
12163     ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12164     // If original load is a SEXTLOAD then we can't simply replace it by a
12165     // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
12166     // followed by a ZEXT, but that is not handled at the moment).
12167     if (LN->getExtensionType() == ISD::SEXTLOAD)
12168       return SDValue();
12169   } else if (Opc == ISD::AND) {
12170     // An AND with a constant mask is the same as a truncate + zero-extend.
12171     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
12172     if (!AndC)
12173       return SDValue();
12174
12175     const APInt &Mask = AndC->getAPIntValue();
12176     unsigned ActiveBits = 0;
12177     if (Mask.isMask()) {
12178       ActiveBits = Mask.countTrailingOnes();
12179     } else if (Mask.isShiftedMask()) {
12180       ShAmt = Mask.countTrailingZeros();
12181       APInt ShiftedMask = Mask.lshr(ShAmt);
12182       ActiveBits = ShiftedMask.countTrailingOnes();
12183       HasShiftedOffset = true;
12184     } else
12185       return SDValue();
12186
12187     ExtType = ISD::ZEXTLOAD;
12188     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
12189   }
12190
12191   // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
12192   // a right shift. Here we redo some of those checks, to possibly adjust the
12193   // ExtVT even further based on "a masking AND". We could also end up here for
12194   // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
12195   // need to be done here as well.
12196   if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
12197     SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
12198     // Bail out when the SRL has more than one use. This is done for historical
12199     // (undocumented) reasons. Maybe intent was to guard the AND-masking below
12200     // check below? And maybe it could be non-profitable to do the transform in
12201     // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
12202     // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
12203     if (!SRL.hasOneUse())
12204       return SDValue();
12205
12206     // Only handle shift with constant shift amount, and the shiftee must be a
12207     // load.
12208     auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
12209     auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
12210     if (!SRL1C || !LN)
12211       return SDValue();
12212
12213     // If the shift amount is larger than the input type then we're not
12214     // accessing any of the loaded bytes.  If the load was a zextload/extload
12215     // then the result of the shift+trunc is zero/undef (handled elsewhere).
12216     ShAmt = SRL1C->getZExtValue();
12217     if (ShAmt >= LN->getMemoryVT().getSizeInBits())
12218       return SDValue();
12219
12220     // Because a SRL must be assumed to *need* to zero-extend the high bits
12221     // (as opposed to anyext the high bits), we can't combine the zextload
12222     // lowering of SRL and an sextload.
12223     if (LN->getExtensionType() == ISD::SEXTLOAD)
12224       return SDValue();
12225
12226     unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12227     // Is the shift amount a multiple of size of ExtVT?
12228     if ((ShAmt & (ExtVTBits - 1)) != 0)
12229       return SDValue();
12230     // Is the load width a multiple of size of ExtVT?
12231     if ((SRL.getScalarValueSizeInBits() & (ExtVTBits - 1)) != 0)
12232       return SDValue();
12233
12234     // If the SRL is only used by a masking AND, we may be able to adjust
12235     // the ExtVT to make the AND redundant.
12236     SDNode *Mask = *(SRL->use_begin());
12237     if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
12238         isa<ConstantSDNode>(Mask->getOperand(1))) {
12239       const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
12240       if (ShiftMask.isMask()) {
12241         EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
12242                                          ShiftMask.countTrailingOnes());
12243         // If the mask is smaller, recompute the type.
12244         if ((ExtVTBits > MaskedVT.getScalarSizeInBits()) &&
12245             TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
12246           ExtVT = MaskedVT;
12247       }
12248     }
12249
12250     N0 = SRL.getOperand(0);
12251   }
12252
12253   // If the load is shifted left (and the result isn't shifted back right), we
12254   // can fold a truncate through the shift. The typical scenario is that N
12255   // points at a TRUNCATE here so the attempted fold is:
12256   //   (truncate (shl (load x), c))) -> (shl (narrow load x), c)
12257   // ShLeftAmt will indicate how much a narrowed load should be shifted left.
12258   unsigned ShLeftAmt = 0;
12259   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12260       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
12261     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
12262       ShLeftAmt = N01->getZExtValue();
12263       N0 = N0.getOperand(0);
12264     }
12265   }
12266
12267   // If we haven't found a load, we can't narrow it.
12268   if (!isa<LoadSDNode>(N0))
12269     return SDValue();
12270
12271   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12272   // Reducing the width of a volatile load is illegal.  For atomics, we may be
12273   // able to reduce the width provided we never widen again. (see D66309)
12274   if (!LN0->isSimple() ||
12275       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
12276     return SDValue();
12277
12278   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
12279     unsigned LVTStoreBits =
12280         LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
12281     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
12282     return LVTStoreBits - EVTStoreBits - ShAmt;
12283   };
12284
12285   // We need to adjust the pointer to the load by ShAmt bits in order to load
12286   // the correct bytes.
12287   unsigned PtrAdjustmentInBits =
12288       DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
12289
12290   uint64_t PtrOff = PtrAdjustmentInBits / 8;
12291   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
12292   SDLoc DL(LN0);
12293   // The original load itself didn't wrap, so an offset within it doesn't.
12294   SDNodeFlags Flags;
12295   Flags.setNoUnsignedWrap(true);
12296   SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
12297                                             TypeSize::Fixed(PtrOff), DL, Flags);
12298   AddToWorklist(NewPtr.getNode());
12299
12300   SDValue Load;
12301   if (ExtType == ISD::NON_EXTLOAD)
12302     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
12303                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12304                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12305   else
12306     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
12307                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
12308                           NewAlign, LN0->getMemOperand()->getFlags(),
12309                           LN0->getAAInfo());
12310
12311   // Replace the old load's chain with the new load's chain.
12312   WorklistRemover DeadNodes(*this);
12313   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12314
12315   // Shift the result left, if we've swallowed a left shift.
12316   SDValue Result = Load;
12317   if (ShLeftAmt != 0) {
12318     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
12319     if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
12320       ShImmTy = VT;
12321     // If the shift amount is as large as the result size (but, presumably,
12322     // no larger than the source) then the useful bits of the result are
12323     // zero; we can't simply return the shortened shift, because the result
12324     // of that operation is undefined.
12325     if (ShLeftAmt >= VT.getScalarSizeInBits())
12326       Result = DAG.getConstant(0, DL, VT);
12327     else
12328       Result = DAG.getNode(ISD::SHL, DL, VT,
12329                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
12330   }
12331
12332   if (HasShiftedOffset) {
12333     // We're using a shifted mask, so the load now has an offset. This means
12334     // that data has been loaded into the lower bytes than it would have been
12335     // before, so we need to shl the loaded data into the correct position in the
12336     // register.
12337     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12338     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12339     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12340   }
12341
12342   // Return the new loaded value.
12343   return Result;
12344 }
12345
12346 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12347   SDValue N0 = N->getOperand(0);
12348   SDValue N1 = N->getOperand(1);
12349   EVT VT = N->getValueType(0);
12350   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12351   unsigned VTBits = VT.getScalarSizeInBits();
12352   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12353
12354   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12355   if (N0.isUndef())
12356     return DAG.getConstant(0, SDLoc(N), VT);
12357
12358   // fold (sext_in_reg c1) -> c1
12359   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
12360     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12361
12362   // If the input is already sign extended, just drop the extension.
12363   if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
12364     return N0;
12365
12366   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12367   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12368       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12369     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12370                        N1);
12371
12372   // fold (sext_in_reg (sext x)) -> (sext x)
12373   // fold (sext_in_reg (aext x)) -> (sext x)
12374   // if x is small enough or if we know that x has more than 1 sign bit and the
12375   // sign_extend_inreg is extending from one of them.
12376   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12377     SDValue N00 = N0.getOperand(0);
12378     unsigned N00Bits = N00.getScalarValueSizeInBits();
12379     if ((N00Bits <= ExtVTBits ||
12380          DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
12381         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12382       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12383   }
12384
12385   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12386   // if x is small enough or if we know that x has more than 1 sign bit and the
12387   // sign_extend_inreg is extending from one of them.
12388   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
12389       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
12390       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
12391     SDValue N00 = N0.getOperand(0);
12392     unsigned N00Bits = N00.getScalarValueSizeInBits();
12393     unsigned DstElts = N0.getValueType().getVectorMinNumElements();
12394     unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12395     bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
12396     APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
12397     if ((N00Bits == ExtVTBits ||
12398          (!IsZext && (N00Bits < ExtVTBits ||
12399                       DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
12400         (!LegalOperations ||
12401          TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
12402       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12403   }
12404
12405   // fold (sext_in_reg (zext x)) -> (sext x)
12406   // iff we are extending the source sign bit.
12407   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12408     SDValue N00 = N0.getOperand(0);
12409     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12410         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12411       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12412   }
12413
12414   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12415   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12416     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12417
12418   // fold operands of sext_in_reg based on knowledge that the top bits are not
12419   // demanded.
12420   if (SimplifyDemandedBits(SDValue(N, 0)))
12421     return SDValue(N, 0);
12422
12423   // fold (sext_in_reg (load x)) -> (smaller sextload x)
12424   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12425   if (SDValue NarrowLoad = reduceLoadWidth(N))
12426     return NarrowLoad;
12427
12428   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12429   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12430   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12431   if (N0.getOpcode() == ISD::SRL) {
12432     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12433       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12434         // We can turn this into an SRA iff the input to the SRL is already sign
12435         // extended enough.
12436         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12437         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12438           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12439                              N0.getOperand(1));
12440       }
12441   }
12442
12443   // fold (sext_inreg (extload x)) -> (sextload x)
12444   // If sextload is not supported by target, we can only do the combine when
12445   // load has one use. Doing otherwise can block folding the extload with other
12446   // extends that the target does support.
12447   if (ISD::isEXTLoad(N0.getNode()) &&
12448       ISD::isUNINDEXEDLoad(N0.getNode()) &&
12449       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12450       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12451         N0.hasOneUse()) ||
12452        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12453     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12454     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12455                                      LN0->getChain(),
12456                                      LN0->getBasePtr(), ExtVT,
12457                                      LN0->getMemOperand());
12458     CombineTo(N, ExtLoad);
12459     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12460     AddToWorklist(ExtLoad.getNode());
12461     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12462   }
12463
12464   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12465   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
12466       N0.hasOneUse() &&
12467       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12468       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12469        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12470     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12471     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12472                                      LN0->getChain(),
12473                                      LN0->getBasePtr(), ExtVT,
12474                                      LN0->getMemOperand());
12475     CombineTo(N, ExtLoad);
12476     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12477     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12478   }
12479
12480   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12481   // ignore it if the masked load is already sign extended
12482   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
12483     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12484         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12485         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
12486       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
12487           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12488           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12489           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12490       CombineTo(N, ExtMaskedLoad);
12491       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12492       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12493     }
12494   }
12495
12496   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12497   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12498     if (SDValue(GN0, 0).hasOneUse() &&
12499         ExtVT == GN0->getMemoryVT() &&
12500         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
12501       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
12502                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
12503
12504       SDValue ExtLoad = DAG.getMaskedGather(
12505           DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12506           GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12507
12508       CombineTo(N, ExtLoad);
12509       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12510       AddToWorklist(ExtLoad.getNode());
12511       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12512     }
12513   }
12514
12515   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12516   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12517     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
12518                                            N0.getOperand(1), false))
12519       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12520   }
12521
12522   return SDValue();
12523 }
12524
12525 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12526   SDValue N0 = N->getOperand(0);
12527   EVT VT = N->getValueType(0);
12528
12529   // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12530   if (N0.isUndef())
12531     return DAG.getConstant(0, SDLoc(N), VT);
12532
12533   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12534     return Res;
12535
12536   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12537     return SDValue(N, 0);
12538
12539   return SDValue();
12540 }
12541
12542 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12543   SDValue N0 = N->getOperand(0);
12544   EVT VT = N->getValueType(0);
12545   EVT SrcVT = N0.getValueType();
12546   bool isLE = DAG.getDataLayout().isLittleEndian();
12547
12548   // noop truncate
12549   if (SrcVT == VT)
12550     return N0;
12551
12552   // fold (truncate (truncate x)) -> (truncate x)
12553   if (N0.getOpcode() == ISD::TRUNCATE)
12554     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12555
12556   // fold (truncate c1) -> c1
12557   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
12558     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12559     if (C.getNode() != N)
12560       return C;
12561   }
12562
12563   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12564   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12565       N0.getOpcode() == ISD::SIGN_EXTEND ||
12566       N0.getOpcode() == ISD::ANY_EXTEND) {
12567     // if the source is smaller than the dest, we still need an extend.
12568     if (N0.getOperand(0).getValueType().bitsLT(VT))
12569       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12570     // if the source is larger than the dest, than we just need the truncate.
12571     if (N0.getOperand(0).getValueType().bitsGT(VT))
12572       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12573     // if the source and dest are the same type, we can drop both the extend
12574     // and the truncate.
12575     return N0.getOperand(0);
12576   }
12577
12578   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12579   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12580     return SDValue();
12581
12582   // Fold extract-and-trunc into a narrow extract. For example:
12583   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12584   //   i32 y = TRUNCATE(i64 x)
12585   //        -- becomes --
12586   //   v16i8 b = BITCAST (v2i64 val)
12587   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12588   //
12589   // Note: We only run this optimization after type legalization (which often
12590   // creates this pattern) and before operation legalization after which
12591   // we need to be more careful about the vector instructions that we generate.
12592   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12593       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12594     EVT VecTy = N0.getOperand(0).getValueType();
12595     EVT ExTy = N0.getValueType();
12596     EVT TrTy = N->getValueType(0);
12597
12598     auto EltCnt = VecTy.getVectorElementCount();
12599     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12600     auto NewEltCnt = EltCnt * SizeRatio;
12601
12602     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
12603     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
12604
12605     SDValue EltNo = N0->getOperand(1);
12606     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
12607       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12608       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12609
12610       SDLoc DL(N);
12611       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
12612                          DAG.getBitcast(NVT, N0.getOperand(0)),
12613                          DAG.getVectorIdxConstant(Index, DL));
12614     }
12615   }
12616
12617   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12618   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12619     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12620         TLI.isTruncateFree(SrcVT, VT)) {
12621       SDLoc SL(N0);
12622       SDValue Cond = N0.getOperand(0);
12623       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12624       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12625       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12626     }
12627   }
12628
12629   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12630   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12631       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12632       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12633     SDValue Amt = N0.getOperand(1);
12634     KnownBits Known = DAG.computeKnownBits(Amt);
12635     unsigned Size = VT.getScalarSizeInBits();
12636     if (Known.countMaxActiveBits() <= Log2_32(Size)) {
12637       SDLoc SL(N);
12638       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12639
12640       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12641       if (AmtVT != Amt.getValueType()) {
12642         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12643         AddToWorklist(Amt.getNode());
12644       }
12645       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12646     }
12647   }
12648
12649   if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12650     return V;
12651
12652   // Attempt to pre-truncate BUILD_VECTOR sources.
12653   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12654       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12655       // Avoid creating illegal types if running after type legalizer.
12656       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12657     SDLoc DL(N);
12658     EVT SVT = VT.getScalarType();
12659     SmallVector<SDValue, 8> TruncOps;
12660     for (const SDValue &Op : N0->op_values()) {
12661       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12662       TruncOps.push_back(TruncOp);
12663     }
12664     return DAG.getBuildVector(VT, DL, TruncOps);
12665   }
12666
12667   // Fold a series of buildvector, bitcast, and truncate if possible.
12668   // For example fold
12669   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12670   //   (2xi32 (buildvector x, y)).
12671   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12672       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12673       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12674       N0.getOperand(0).hasOneUse()) {
12675     SDValue BuildVect = N0.getOperand(0);
12676     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12677     EVT TruncVecEltTy = VT.getVectorElementType();
12678
12679     // Check that the element types match.
12680     if (BuildVectEltTy == TruncVecEltTy) {
12681       // Now we only need to compute the offset of the truncated elements.
12682       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
12683       unsigned TruncVecNumElts = VT.getVectorNumElements();
12684       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12685
12686       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
12687              "Invalid number of elements");
12688
12689       SmallVector<SDValue, 8> Opnds;
12690       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12691         Opnds.push_back(BuildVect.getOperand(i));
12692
12693       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12694     }
12695   }
12696
12697   // See if we can simplify the input to this truncate through knowledge that
12698   // only the low bits are being used.
12699   // For example "trunc (or (shl x, 8), y)" // -> trunc y
12700   // Currently we only perform this optimization on scalars because vectors
12701   // may have different active low bits.
12702   if (!VT.isVector()) {
12703     APInt Mask =
12704         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
12705     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12706       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12707   }
12708
12709   // fold (truncate (load x)) -> (smaller load x)
12710   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12711   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12712     if (SDValue Reduced = reduceLoadWidth(N))
12713       return Reduced;
12714
12715     // Handle the case where the load remains an extending load even
12716     // after truncation.
12717     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12718       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12719       if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12720         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12721                                          VT, LN0->getChain(), LN0->getBasePtr(),
12722                                          LN0->getMemoryVT(),
12723                                          LN0->getMemOperand());
12724         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12725         return NewLoad;
12726       }
12727     }
12728   }
12729
12730   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12731   // where ... are all 'undef'.
12732   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12733     SmallVector<EVT, 8> VTs;
12734     SDValue V;
12735     unsigned Idx = 0;
12736     unsigned NumDefs = 0;
12737
12738     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12739       SDValue X = N0.getOperand(i);
12740       if (!X.isUndef()) {
12741         V = X;
12742         Idx = i;
12743         NumDefs++;
12744       }
12745       // Stop if more than one members are non-undef.
12746       if (NumDefs > 1)
12747         break;
12748
12749       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12750                                      VT.getVectorElementType(),
12751                                      X.getValueType().getVectorElementCount()));
12752     }
12753
12754     if (NumDefs == 0)
12755       return DAG.getUNDEF(VT);
12756
12757     if (NumDefs == 1) {
12758       assert(V.getNode() && "The single defined operand is empty!");
12759       SmallVector<SDValue, 8> Opnds;
12760       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12761         if (i != Idx) {
12762           Opnds.push_back(DAG.getUNDEF(VTs[i]));
12763           continue;
12764         }
12765         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12766         AddToWorklist(NV.getNode());
12767         Opnds.push_back(NV);
12768       }
12769       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12770     }
12771   }
12772
12773   // Fold truncate of a bitcast of a vector to an extract of the low vector
12774   // element.
12775   //
12776   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12777   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12778     SDValue VecSrc = N0.getOperand(0);
12779     EVT VecSrcVT = VecSrc.getValueType();
12780     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12781         (!LegalOperations ||
12782          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12783       SDLoc SL(N);
12784
12785       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12786       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12787                          DAG.getVectorIdxConstant(Idx, SL));
12788     }
12789   }
12790
12791   // Simplify the operands using demanded-bits information.
12792   if (SimplifyDemandedBits(SDValue(N, 0)))
12793     return SDValue(N, 0);
12794
12795   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12796   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12797   // When the adde's carry is not used.
12798   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12799       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12800       // We only do for addcarry before legalize operation
12801       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12802        TLI.isOperationLegal(N0.getOpcode(), VT))) {
12803     SDLoc SL(N);
12804     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12805     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12806     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12807     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12808   }
12809
12810   // fold (truncate (extract_subvector(ext x))) ->
12811   //      (extract_subvector x)
12812   // TODO: This can be generalized to cover cases where the truncate and extract
12813   // do not fully cancel each other out.
12814   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12815     SDValue N00 = N0.getOperand(0);
12816     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12817         N00.getOpcode() == ISD::ZERO_EXTEND ||
12818         N00.getOpcode() == ISD::ANY_EXTEND) {
12819       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12820           VT.getVectorElementType())
12821         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12822                            N00.getOperand(0), N0.getOperand(1));
12823     }
12824   }
12825
12826   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12827     return NewVSel;
12828
12829   // Narrow a suitable binary operation with a non-opaque constant operand by
12830   // moving it ahead of the truncate. This is limited to pre-legalization
12831   // because targets may prefer a wider type during later combines and invert
12832   // this transform.
12833   switch (N0.getOpcode()) {
12834   case ISD::ADD:
12835   case ISD::SUB:
12836   case ISD::MUL:
12837   case ISD::AND:
12838   case ISD::OR:
12839   case ISD::XOR:
12840     if (!LegalOperations && N0.hasOneUse() &&
12841         (isConstantOrConstantVector(N0.getOperand(0), true) ||
12842          isConstantOrConstantVector(N0.getOperand(1), true))) {
12843       // TODO: We already restricted this to pre-legalization, but for vectors
12844       // we are extra cautious to not create an unsupported operation.
12845       // Target-specific changes are likely needed to avoid regressions here.
12846       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12847         SDLoc DL(N);
12848         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12849         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12850         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12851       }
12852     }
12853     break;
12854   case ISD::USUBSAT:
12855     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12856     // enough to know that the upper bits are zero we must ensure that we don't
12857     // introduce an extra truncate.
12858     if (!LegalOperations && N0.hasOneUse() &&
12859         N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12860         N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
12861             VT.getScalarSizeInBits() &&
12862         hasOperation(N0.getOpcode(), VT)) {
12863       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12864                                  DAG, SDLoc(N));
12865     }
12866     break;
12867   }
12868
12869   return SDValue();
12870 }
12871
12872 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
12873   SDValue Elt = N->getOperand(i);
12874   if (Elt.getOpcode() != ISD::MERGE_VALUES)
12875     return Elt.getNode();
12876   return Elt.getOperand(Elt.getResNo()).getNode();
12877 }
12878
12879 /// build_pair (load, load) -> load
12880 /// if load locations are consecutive.
12881 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
12882   assert(N->getOpcode() == ISD::BUILD_PAIR);
12883
12884   auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
12885   auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
12886
12887   // A BUILD_PAIR is always having the least significant part in elt 0 and the
12888   // most significant part in elt 1. So when combining into one large load, we
12889   // need to consider the endianness.
12890   if (DAG.getDataLayout().isBigEndian())
12891     std::swap(LD1, LD2);
12892
12893   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
12894       !LD1->hasOneUse() || !LD2->hasOneUse() ||
12895       LD1->getAddressSpace() != LD2->getAddressSpace())
12896     return SDValue();
12897
12898   bool LD1Fast = false;
12899   EVT LD1VT = LD1->getValueType(0);
12900   unsigned LD1Bytes = LD1VT.getStoreSize();
12901   if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
12902       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
12903       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
12904                              *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
12905     return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
12906                        LD1->getPointerInfo(), LD1->getAlign());
12907
12908   return SDValue();
12909 }
12910
12911 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
12912   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
12913   // and Lo parts; on big-endian machines it doesn't.
12914   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
12915 }
12916
12917 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
12918                                     const TargetLowering &TLI) {
12919   // If this is not a bitcast to an FP type or if the target doesn't have
12920   // IEEE754-compliant FP logic, we're done.
12921   EVT VT = N->getValueType(0);
12922   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
12923     return SDValue();
12924
12925   // TODO: Handle cases where the integer constant is a different scalar
12926   // bitwidth to the FP.
12927   SDValue N0 = N->getOperand(0);
12928   EVT SourceVT = N0.getValueType();
12929   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
12930     return SDValue();
12931
12932   unsigned FPOpcode;
12933   APInt SignMask;
12934   switch (N0.getOpcode()) {
12935   case ISD::AND:
12936     FPOpcode = ISD::FABS;
12937     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
12938     break;
12939   case ISD::XOR:
12940     FPOpcode = ISD::FNEG;
12941     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12942     break;
12943   case ISD::OR:
12944     FPOpcode = ISD::FABS;
12945     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12946     break;
12947   default:
12948     return SDValue();
12949   }
12950
12951   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
12952   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
12953   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
12954   //   fneg (fabs X)
12955   SDValue LogicOp0 = N0.getOperand(0);
12956   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
12957   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
12958       LogicOp0.getOpcode() == ISD::BITCAST &&
12959       LogicOp0.getOperand(0).getValueType() == VT) {
12960     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
12961     NumFPLogicOpsConv++;
12962     if (N0.getOpcode() == ISD::OR)
12963       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
12964     return FPOp;
12965   }
12966
12967   return SDValue();
12968 }
12969
12970 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
12971   SDValue N0 = N->getOperand(0);
12972   EVT VT = N->getValueType(0);
12973
12974   if (N0.isUndef())
12975     return DAG.getUNDEF(VT);
12976
12977   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
12978   // Only do this before legalize types, unless both types are integer and the
12979   // scalar type is legal. Only do this before legalize ops, since the target
12980   // maybe depending on the bitcast.
12981   // First check to see if this is all constant.
12982   // TODO: Support FP bitcasts after legalize types.
12983   if (VT.isVector() &&
12984       (!LegalTypes ||
12985        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
12986         TLI.isTypeLegal(VT.getVectorElementType()))) &&
12987       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
12988       cast<BuildVectorSDNode>(N0)->isConstant())
12989     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
12990                                              VT.getVectorElementType());
12991
12992   // If the input is a constant, let getNode fold it.
12993   if (isIntOrFPConstant(N0)) {
12994     // If we can't allow illegal operations, we need to check that this is just
12995     // a fp -> int or int -> conversion and that the resulting operation will
12996     // be legal.
12997     if (!LegalOperations ||
12998         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
12999          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
13000         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
13001          TLI.isOperationLegal(ISD::Constant, VT))) {
13002       SDValue C = DAG.getBitcast(VT, N0);
13003       if (C.getNode() != N)
13004         return C;
13005     }
13006   }
13007
13008   // (conv (conv x, t1), t2) -> (conv x, t2)
13009   if (N0.getOpcode() == ISD::BITCAST)
13010     return DAG.getBitcast(VT, N0.getOperand(0));
13011
13012   // fold (conv (load x)) -> (load (conv*)x)
13013   // If the resultant load doesn't need a higher alignment than the original!
13014   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13015       // Do not remove the cast if the types differ in endian layout.
13016       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
13017           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
13018       // If the load is volatile, we only want to change the load type if the
13019       // resulting load is legal. Otherwise we might increase the number of
13020       // memory accesses. We don't care if the original type was legal or not
13021       // as we assume software couldn't rely on the number of accesses of an
13022       // illegal type.
13023       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
13024        TLI.isOperationLegal(ISD::LOAD, VT))) {
13025     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13026
13027     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
13028                                     *LN0->getMemOperand())) {
13029       SDValue Load =
13030           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
13031                       LN0->getPointerInfo(), LN0->getAlign(),
13032                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13033       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
13034       return Load;
13035     }
13036   }
13037
13038   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
13039     return V;
13040
13041   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
13042   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
13043   //
13044   // For ppc_fp128:
13045   // fold (bitcast (fneg x)) ->
13046   //     flipbit = signbit
13047   //     (xor (bitcast x) (build_pair flipbit, flipbit))
13048   //
13049   // fold (bitcast (fabs x)) ->
13050   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
13051   //     (xor (bitcast x) (build_pair flipbit, flipbit))
13052   // This often reduces constant pool loads.
13053   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
13054        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
13055       N0.getNode()->hasOneUse() && VT.isInteger() &&
13056       !VT.isVector() && !N0.getValueType().isVector()) {
13057     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
13058     AddToWorklist(NewConv.getNode());
13059
13060     SDLoc DL(N);
13061     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13062       assert(VT.getSizeInBits() == 128);
13063       SDValue SignBit = DAG.getConstant(
13064           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
13065       SDValue FlipBit;
13066       if (N0.getOpcode() == ISD::FNEG) {
13067         FlipBit = SignBit;
13068         AddToWorklist(FlipBit.getNode());
13069       } else {
13070         assert(N0.getOpcode() == ISD::FABS);
13071         SDValue Hi =
13072             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
13073                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13074                                               SDLoc(NewConv)));
13075         AddToWorklist(Hi.getNode());
13076         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
13077         AddToWorklist(FlipBit.getNode());
13078       }
13079       SDValue FlipBits =
13080           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13081       AddToWorklist(FlipBits.getNode());
13082       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
13083     }
13084     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13085     if (N0.getOpcode() == ISD::FNEG)
13086       return DAG.getNode(ISD::XOR, DL, VT,
13087                          NewConv, DAG.getConstant(SignBit, DL, VT));
13088     assert(N0.getOpcode() == ISD::FABS);
13089     return DAG.getNode(ISD::AND, DL, VT,
13090                        NewConv, DAG.getConstant(~SignBit, DL, VT));
13091   }
13092
13093   // fold (bitconvert (fcopysign cst, x)) ->
13094   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
13095   // Note that we don't handle (copysign x, cst) because this can always be
13096   // folded to an fneg or fabs.
13097   //
13098   // For ppc_fp128:
13099   // fold (bitcast (fcopysign cst, x)) ->
13100   //     flipbit = (and (extract_element
13101   //                     (xor (bitcast cst), (bitcast x)), 0),
13102   //                    signbit)
13103   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
13104   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
13105       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
13106       VT.isInteger() && !VT.isVector()) {
13107     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
13108     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
13109     if (isTypeLegal(IntXVT)) {
13110       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
13111       AddToWorklist(X.getNode());
13112
13113       // If X has a different width than the result/lhs, sext it or truncate it.
13114       unsigned VTWidth = VT.getSizeInBits();
13115       if (OrigXWidth < VTWidth) {
13116         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
13117         AddToWorklist(X.getNode());
13118       } else if (OrigXWidth > VTWidth) {
13119         // To get the sign bit in the right place, we have to shift it right
13120         // before truncating.
13121         SDLoc DL(X);
13122         X = DAG.getNode(ISD::SRL, DL,
13123                         X.getValueType(), X,
13124                         DAG.getConstant(OrigXWidth-VTWidth, DL,
13125                                         X.getValueType()));
13126         AddToWorklist(X.getNode());
13127         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
13128         AddToWorklist(X.getNode());
13129       }
13130
13131       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13132         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
13133         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13134         AddToWorklist(Cst.getNode());
13135         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
13136         AddToWorklist(X.getNode());
13137         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
13138         AddToWorklist(XorResult.getNode());
13139         SDValue XorResult64 = DAG.getNode(
13140             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
13141             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13142                                   SDLoc(XorResult)));
13143         AddToWorklist(XorResult64.getNode());
13144         SDValue FlipBit =
13145             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
13146                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
13147         AddToWorklist(FlipBit.getNode());
13148         SDValue FlipBits =
13149             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13150         AddToWorklist(FlipBits.getNode());
13151         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
13152       }
13153       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13154       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
13155                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
13156       AddToWorklist(X.getNode());
13157
13158       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13159       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
13160                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
13161       AddToWorklist(Cst.getNode());
13162
13163       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
13164     }
13165   }
13166
13167   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
13168   if (N0.getOpcode() == ISD::BUILD_PAIR)
13169     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
13170       return CombineLD;
13171
13172   // Remove double bitcasts from shuffles - this is often a legacy of
13173   // XformToShuffleWithZero being used to combine bitmaskings (of
13174   // float vectors bitcast to integer vectors) into shuffles.
13175   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
13176   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
13177       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
13178       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
13179       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
13180     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
13181
13182     // If operands are a bitcast, peek through if it casts the original VT.
13183     // If operands are a constant, just bitcast back to original VT.
13184     auto PeekThroughBitcast = [&](SDValue Op) {
13185       if (Op.getOpcode() == ISD::BITCAST &&
13186           Op.getOperand(0).getValueType() == VT)
13187         return SDValue(Op.getOperand(0));
13188       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
13189           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
13190         return DAG.getBitcast(VT, Op);
13191       return SDValue();
13192     };
13193
13194     // FIXME: If either input vector is bitcast, try to convert the shuffle to
13195     // the result type of this bitcast. This would eliminate at least one
13196     // bitcast. See the transform in InstCombine.
13197     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
13198     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
13199     if (!(SV0 && SV1))
13200       return SDValue();
13201
13202     int MaskScale =
13203         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
13204     SmallVector<int, 8> NewMask;
13205     for (int M : SVN->getMask())
13206       for (int i = 0; i != MaskScale; ++i)
13207         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
13208
13209     SDValue LegalShuffle =
13210         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
13211     if (LegalShuffle)
13212       return LegalShuffle;
13213   }
13214
13215   return SDValue();
13216 }
13217
13218 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
13219   EVT VT = N->getValueType(0);
13220   return CombineConsecutiveLoads(N, VT);
13221 }
13222
13223 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
13224   SDValue N0 = N->getOperand(0);
13225
13226   if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
13227     return N0;
13228
13229   return SDValue();
13230 }
13231
13232 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
13233 /// operands. DstEltVT indicates the destination element value type.
13234 SDValue DAGCombiner::
13235 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
13236   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
13237
13238   // If this is already the right type, we're done.
13239   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
13240
13241   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
13242   unsigned DstBitSize = DstEltVT.getSizeInBits();
13243
13244   // If this is a conversion of N elements of one type to N elements of another
13245   // type, convert each element.  This handles FP<->INT cases.
13246   if (SrcBitSize == DstBitSize) {
13247     SmallVector<SDValue, 8> Ops;
13248     for (SDValue Op : BV->op_values()) {
13249       // If the vector element type is not legal, the BUILD_VECTOR operands
13250       // are promoted and implicitly truncated.  Make that explicit here.
13251       if (Op.getValueType() != SrcEltVT)
13252         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
13253       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
13254       AddToWorklist(Ops.back().getNode());
13255     }
13256     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
13257                               BV->getValueType(0).getVectorNumElements());
13258     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
13259   }
13260
13261   // Otherwise, we're growing or shrinking the elements.  To avoid having to
13262   // handle annoying details of growing/shrinking FP values, we convert them to
13263   // int first.
13264   if (SrcEltVT.isFloatingPoint()) {
13265     // Convert the input float vector to a int vector where the elements are the
13266     // same sizes.
13267     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
13268     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
13269     SrcEltVT = IntVT;
13270   }
13271
13272   // Now we know the input is an integer vector.  If the output is a FP type,
13273   // convert to integer first, then to FP of the right size.
13274   if (DstEltVT.isFloatingPoint()) {
13275     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
13276     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
13277
13278     // Next, convert to FP elements of the same size.
13279     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
13280   }
13281
13282   // Okay, we know the src/dst types are both integers of differing types.
13283   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
13284
13285   // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
13286   // BuildVectorSDNode?
13287   auto *BVN = cast<BuildVectorSDNode>(BV);
13288
13289   // Extract the constant raw bit data.
13290   BitVector UndefElements;
13291   SmallVector<APInt> RawBits;
13292   bool IsLE = DAG.getDataLayout().isLittleEndian();
13293   if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
13294     return SDValue();
13295
13296   SDLoc DL(BV);
13297   SmallVector<SDValue, 8> Ops;
13298   for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
13299     if (UndefElements[I])
13300       Ops.push_back(DAG.getUNDEF(DstEltVT));
13301     else
13302       Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
13303   }
13304
13305   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
13306   return DAG.getBuildVector(VT, DL, Ops);
13307 }
13308
13309 // Returns true if floating point contraction is allowed on the FMUL-SDValue
13310 // `N`
13311 static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
13312   assert(N.getOpcode() == ISD::FMUL);
13313
13314   return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
13315          N->getFlags().hasAllowContract();
13316 }
13317
13318 // Returns true if `N` can assume no infinities involved in its computation.
13319 static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
13320   return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
13321 }
13322
13323 /// Try to perform FMA combining on a given FADD node.
13324 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13325   SDValue N0 = N->getOperand(0);
13326   SDValue N1 = N->getOperand(1);
13327   EVT VT = N->getValueType(0);
13328   SDLoc SL(N);
13329
13330   const TargetOptions &Options = DAG.getTarget().Options;
13331
13332   // Floating-point multiply-add with intermediate rounding.
13333   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13334
13335   // Floating-point multiply-add without intermediate rounding.
13336   bool HasFMA =
13337       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13338       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13339
13340   // No valid opcode, do not combine.
13341   if (!HasFMAD && !HasFMA)
13342     return SDValue();
13343
13344   bool CanReassociate =
13345       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13346   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13347                               Options.UnsafeFPMath || HasFMAD);
13348   // If the addition is not contractable, do not combine.
13349   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13350     return SDValue();
13351
13352   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13353     return SDValue();
13354
13355   // Always prefer FMAD to FMA for precision.
13356   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13357   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13358
13359   auto isFusedOp = [&](SDValue N) {
13360     unsigned Opcode = N.getOpcode();
13361     return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13362   };
13363
13364   // Is the node an FMUL and contractable either due to global flags or
13365   // SDNodeFlags.
13366   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13367     if (N.getOpcode() != ISD::FMUL)
13368       return false;
13369     return AllowFusionGlobally || N->getFlags().hasAllowContract();
13370   };
13371   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13372   // prefer to fold the multiply with fewer uses.
13373   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
13374     if (N0.getNode()->use_size() > N1.getNode()->use_size())
13375       std::swap(N0, N1);
13376   }
13377
13378   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13379   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13380     return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13381                        N0.getOperand(1), N1);
13382   }
13383
13384   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13385   // Note: Commutes FADD operands.
13386   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13387     return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13388                        N1.getOperand(1), N0);
13389   }
13390
13391   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13392   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13393   // This requires reassociation because it changes the order of operations.
13394   SDValue FMA, E;
13395   if (CanReassociate && isFusedOp(N0) &&
13396       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13397       N0.getOperand(2).hasOneUse()) {
13398     FMA = N0;
13399     E = N1;
13400   } else if (CanReassociate && isFusedOp(N1) &&
13401              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13402              N1.getOperand(2).hasOneUse()) {
13403     FMA = N1;
13404     E = N0;
13405   }
13406   if (FMA && E) {
13407     SDValue A = FMA.getOperand(0);
13408     SDValue B = FMA.getOperand(1);
13409     SDValue C = FMA.getOperand(2).getOperand(0);
13410     SDValue D = FMA.getOperand(2).getOperand(1);
13411     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13412     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13413   }
13414
13415   // Look through FP_EXTEND nodes to do more combining.
13416
13417   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13418   if (N0.getOpcode() == ISD::FP_EXTEND) {
13419     SDValue N00 = N0.getOperand(0);
13420     if (isContractableFMUL(N00) &&
13421         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13422                             N00.getValueType())) {
13423       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13424                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13425                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13426                          N1);
13427     }
13428   }
13429
13430   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13431   // Note: Commutes FADD operands.
13432   if (N1.getOpcode() == ISD::FP_EXTEND) {
13433     SDValue N10 = N1.getOperand(0);
13434     if (isContractableFMUL(N10) &&
13435         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13436                             N10.getValueType())) {
13437       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13438                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13439                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13440                          N0);
13441     }
13442   }
13443
13444   // More folding opportunities when target permits.
13445   if (Aggressive) {
13446     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13447     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
13448     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13449                                     SDValue Z) {
13450       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13451                          DAG.getNode(PreferredFusedOpcode, SL, VT,
13452                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13453                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13454                                      Z));
13455     };
13456     if (isFusedOp(N0)) {
13457       SDValue N02 = N0.getOperand(2);
13458       if (N02.getOpcode() == ISD::FP_EXTEND) {
13459         SDValue N020 = N02.getOperand(0);
13460         if (isContractableFMUL(N020) &&
13461             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13462                                 N020.getValueType())) {
13463           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13464                                       N020.getOperand(0), N020.getOperand(1),
13465                                       N1);
13466         }
13467       }
13468     }
13469
13470     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13471     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13472     // FIXME: This turns two single-precision and one double-precision
13473     // operation into two double-precision operations, which might not be
13474     // interesting for all targets, especially GPUs.
13475     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13476                                     SDValue Z) {
13477       return DAG.getNode(
13478           PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13479           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13480           DAG.getNode(PreferredFusedOpcode, SL, VT,
13481                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13482                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13483     };
13484     if (N0.getOpcode() == ISD::FP_EXTEND) {
13485       SDValue N00 = N0.getOperand(0);
13486       if (isFusedOp(N00)) {
13487         SDValue N002 = N00.getOperand(2);
13488         if (isContractableFMUL(N002) &&
13489             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13490                                 N00.getValueType())) {
13491           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13492                                       N002.getOperand(0), N002.getOperand(1),
13493                                       N1);
13494         }
13495       }
13496     }
13497
13498     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13499     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
13500     if (isFusedOp(N1)) {
13501       SDValue N12 = N1.getOperand(2);
13502       if (N12.getOpcode() == ISD::FP_EXTEND) {
13503         SDValue N120 = N12.getOperand(0);
13504         if (isContractableFMUL(N120) &&
13505             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13506                                 N120.getValueType())) {
13507           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13508                                       N120.getOperand(0), N120.getOperand(1),
13509                                       N0);
13510         }
13511       }
13512     }
13513
13514     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13515     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13516     // FIXME: This turns two single-precision and one double-precision
13517     // operation into two double-precision operations, which might not be
13518     // interesting for all targets, especially GPUs.
13519     if (N1.getOpcode() == ISD::FP_EXTEND) {
13520       SDValue N10 = N1.getOperand(0);
13521       if (isFusedOp(N10)) {
13522         SDValue N102 = N10.getOperand(2);
13523         if (isContractableFMUL(N102) &&
13524             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13525                                 N10.getValueType())) {
13526           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13527                                       N102.getOperand(0), N102.getOperand(1),
13528                                       N0);
13529         }
13530       }
13531     }
13532   }
13533
13534   return SDValue();
13535 }
13536
13537 /// Try to perform FMA combining on a given FSUB node.
13538 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13539   SDValue N0 = N->getOperand(0);
13540   SDValue N1 = N->getOperand(1);
13541   EVT VT = N->getValueType(0);
13542   SDLoc SL(N);
13543
13544   const TargetOptions &Options = DAG.getTarget().Options;
13545   // Floating-point multiply-add with intermediate rounding.
13546   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13547
13548   // Floating-point multiply-add without intermediate rounding.
13549   bool HasFMA =
13550       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13551       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13552
13553   // No valid opcode, do not combine.
13554   if (!HasFMAD && !HasFMA)
13555     return SDValue();
13556
13557   const SDNodeFlags Flags = N->getFlags();
13558   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13559                               Options.UnsafeFPMath || HasFMAD);
13560
13561   // If the subtraction is not contractable, do not combine.
13562   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13563     return SDValue();
13564
13565   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13566     return SDValue();
13567
13568   // Always prefer FMAD to FMA for precision.
13569   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13570   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13571   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13572
13573   // Is the node an FMUL and contractable either due to global flags or
13574   // SDNodeFlags.
13575   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13576     if (N.getOpcode() != ISD::FMUL)
13577       return false;
13578     return AllowFusionGlobally || N->getFlags().hasAllowContract();
13579   };
13580
13581   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13582   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13583     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13584       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13585                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13586     }
13587     return SDValue();
13588   };
13589
13590   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13591   // Note: Commutes FSUB operands.
13592   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13593     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13594       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13595                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13596                          YZ.getOperand(1), X);
13597     }
13598     return SDValue();
13599   };
13600
13601   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13602   // prefer to fold the multiply with fewer uses.
13603   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13604       (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13605     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13606     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13607       return V;
13608     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13609     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13610       return V;
13611   } else {
13612     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13613     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13614       return V;
13615     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13616     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13617       return V;
13618   }
13619
13620   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13621   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13622       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13623     SDValue N00 = N0.getOperand(0).getOperand(0);
13624     SDValue N01 = N0.getOperand(0).getOperand(1);
13625     return DAG.getNode(PreferredFusedOpcode, SL, VT,
13626                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13627                        DAG.getNode(ISD::FNEG, SL, VT, N1));
13628   }
13629
13630   // Look through FP_EXTEND nodes to do more combining.
13631
13632   // fold (fsub (fpext (fmul x, y)), z)
13633   //   -> (fma (fpext x), (fpext y), (fneg z))
13634   if (N0.getOpcode() == ISD::FP_EXTEND) {
13635     SDValue N00 = N0.getOperand(0);
13636     if (isContractableFMUL(N00) &&
13637         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13638                             N00.getValueType())) {
13639       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13640                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13641                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13642                          DAG.getNode(ISD::FNEG, SL, VT, N1));
13643     }
13644   }
13645
13646   // fold (fsub x, (fpext (fmul y, z)))
13647   //   -> (fma (fneg (fpext y)), (fpext z), x)
13648   // Note: Commutes FSUB operands.
13649   if (N1.getOpcode() == ISD::FP_EXTEND) {
13650     SDValue N10 = N1.getOperand(0);
13651     if (isContractableFMUL(N10) &&
13652         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13653                             N10.getValueType())) {
13654       return DAG.getNode(
13655           PreferredFusedOpcode, SL, VT,
13656           DAG.getNode(ISD::FNEG, SL, VT,
13657                       DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13658           DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13659     }
13660   }
13661
13662   // fold (fsub (fpext (fneg (fmul, x, y))), z)
13663   //   -> (fneg (fma (fpext x), (fpext y), z))
13664   // Note: This could be removed with appropriate canonicalization of the
13665   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13666   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13667   // from implementing the canonicalization in visitFSUB.
13668   if (N0.getOpcode() == ISD::FP_EXTEND) {
13669     SDValue N00 = N0.getOperand(0);
13670     if (N00.getOpcode() == ISD::FNEG) {
13671       SDValue N000 = N00.getOperand(0);
13672       if (isContractableFMUL(N000) &&
13673           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13674                               N00.getValueType())) {
13675         return DAG.getNode(
13676             ISD::FNEG, SL, VT,
13677             DAG.getNode(PreferredFusedOpcode, SL, VT,
13678                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13679                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13680                         N1));
13681       }
13682     }
13683   }
13684
13685   // fold (fsub (fneg (fpext (fmul, x, y))), z)
13686   //   -> (fneg (fma (fpext x)), (fpext y), z)
13687   // Note: This could be removed with appropriate canonicalization of the
13688   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13689   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13690   // from implementing the canonicalization in visitFSUB.
13691   if (N0.getOpcode() == ISD::FNEG) {
13692     SDValue N00 = N0.getOperand(0);
13693     if (N00.getOpcode() == ISD::FP_EXTEND) {
13694       SDValue N000 = N00.getOperand(0);
13695       if (isContractableFMUL(N000) &&
13696           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13697                               N000.getValueType())) {
13698         return DAG.getNode(
13699             ISD::FNEG, SL, VT,
13700             DAG.getNode(PreferredFusedOpcode, SL, VT,
13701                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13702                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13703                         N1));
13704       }
13705     }
13706   }
13707
13708   auto isReassociable = [Options](SDNode *N) {
13709     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13710   };
13711
13712   auto isContractableAndReassociableFMUL = [isContractableFMUL,
13713                                             isReassociable](SDValue N) {
13714     return isContractableFMUL(N) && isReassociable(N.getNode());
13715   };
13716
13717   auto isFusedOp = [&](SDValue N) {
13718     unsigned Opcode = N.getOpcode();
13719     return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13720   };
13721
13722   // More folding opportunities when target permits.
13723   if (Aggressive && isReassociable(N)) {
13724     bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
13725     // fold (fsub (fma x, y, (fmul u, v)), z)
13726     //   -> (fma x, y (fma u, v, (fneg z)))
13727     if (CanFuse && isFusedOp(N0) &&
13728         isContractableAndReassociableFMUL(N0.getOperand(2)) &&
13729         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
13730       return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13731                          N0.getOperand(1),
13732                          DAG.getNode(PreferredFusedOpcode, SL, VT,
13733                                      N0.getOperand(2).getOperand(0),
13734                                      N0.getOperand(2).getOperand(1),
13735                                      DAG.getNode(ISD::FNEG, SL, VT, N1)));
13736     }
13737
13738     // fold (fsub x, (fma y, z, (fmul u, v)))
13739     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
13740     if (CanFuse && isFusedOp(N1) &&
13741         isContractableAndReassociableFMUL(N1.getOperand(2)) &&
13742         N1->hasOneUse() && NoSignedZero) {
13743       SDValue N20 = N1.getOperand(2).getOperand(0);
13744       SDValue N21 = N1.getOperand(2).getOperand(1);
13745       return DAG.getNode(
13746           PreferredFusedOpcode, SL, VT,
13747           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13748           DAG.getNode(PreferredFusedOpcode, SL, VT,
13749                       DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13750     }
13751
13752     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13753     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13754     if (isFusedOp(N0) && N0->hasOneUse()) {
13755       SDValue N02 = N0.getOperand(2);
13756       if (N02.getOpcode() == ISD::FP_EXTEND) {
13757         SDValue N020 = N02.getOperand(0);
13758         if (isContractableAndReassociableFMUL(N020) &&
13759             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13760                                 N020.getValueType())) {
13761           return DAG.getNode(
13762               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13763               DAG.getNode(
13764                   PreferredFusedOpcode, SL, VT,
13765                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13766                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13767                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
13768         }
13769       }
13770     }
13771
13772     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13773     //   -> (fma (fpext x), (fpext y),
13774     //           (fma (fpext u), (fpext v), (fneg z)))
13775     // FIXME: This turns two single-precision and one double-precision
13776     // operation into two double-precision operations, which might not be
13777     // interesting for all targets, especially GPUs.
13778     if (N0.getOpcode() == ISD::FP_EXTEND) {
13779       SDValue N00 = N0.getOperand(0);
13780       if (isFusedOp(N00)) {
13781         SDValue N002 = N00.getOperand(2);
13782         if (isContractableAndReassociableFMUL(N002) &&
13783             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13784                                 N00.getValueType())) {
13785           return DAG.getNode(
13786               PreferredFusedOpcode, SL, VT,
13787               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13788               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13789               DAG.getNode(
13790                   PreferredFusedOpcode, SL, VT,
13791                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13792                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13793                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
13794         }
13795       }
13796     }
13797
13798     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13799     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13800     if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13801         N1->hasOneUse()) {
13802       SDValue N120 = N1.getOperand(2).getOperand(0);
13803       if (isContractableAndReassociableFMUL(N120) &&
13804           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13805                               N120.getValueType())) {
13806         SDValue N1200 = N120.getOperand(0);
13807         SDValue N1201 = N120.getOperand(1);
13808         return DAG.getNode(
13809             PreferredFusedOpcode, SL, VT,
13810             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13811             DAG.getNode(PreferredFusedOpcode, SL, VT,
13812                         DAG.getNode(ISD::FNEG, SL, VT,
13813                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13814                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13815       }
13816     }
13817
13818     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13819     //   -> (fma (fneg (fpext y)), (fpext z),
13820     //           (fma (fneg (fpext u)), (fpext v), x))
13821     // FIXME: This turns two single-precision and one double-precision
13822     // operation into two double-precision operations, which might not be
13823     // interesting for all targets, especially GPUs.
13824     if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
13825       SDValue CvtSrc = N1.getOperand(0);
13826       SDValue N100 = CvtSrc.getOperand(0);
13827       SDValue N101 = CvtSrc.getOperand(1);
13828       SDValue N102 = CvtSrc.getOperand(2);
13829       if (isContractableAndReassociableFMUL(N102) &&
13830           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13831                               CvtSrc.getValueType())) {
13832         SDValue N1020 = N102.getOperand(0);
13833         SDValue N1021 = N102.getOperand(1);
13834         return DAG.getNode(
13835             PreferredFusedOpcode, SL, VT,
13836             DAG.getNode(ISD::FNEG, SL, VT,
13837                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13838             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13839             DAG.getNode(PreferredFusedOpcode, SL, VT,
13840                         DAG.getNode(ISD::FNEG, SL, VT,
13841                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13842                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13843       }
13844     }
13845   }
13846
13847   return SDValue();
13848 }
13849
13850 /// Try to perform FMA combining on a given FMUL node based on the distributive
13851 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13852 /// subtraction instead of addition).
13853 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13854   SDValue N0 = N->getOperand(0);
13855   SDValue N1 = N->getOperand(1);
13856   EVT VT = N->getValueType(0);
13857   SDLoc SL(N);
13858
13859   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
13860
13861   const TargetOptions &Options = DAG.getTarget().Options;
13862
13863   // The transforms below are incorrect when x == 0 and y == inf, because the
13864   // intermediate multiplication produces a nan.
13865   SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
13866   if (!hasNoInfs(Options, FAdd))
13867     return SDValue();
13868
13869   // Floating-point multiply-add without intermediate rounding.
13870   bool HasFMA =
13871       isContractableFMUL(Options, SDValue(N, 0)) &&
13872       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13873       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13874
13875   // Floating-point multiply-add with intermediate rounding. This can result
13876   // in a less precise result due to the changed rounding order.
13877   bool HasFMAD = Options.UnsafeFPMath &&
13878                  (LegalOperations && TLI.isFMADLegal(DAG, N));
13879
13880   // No valid opcode, do not combine.
13881   if (!HasFMAD && !HasFMA)
13882     return SDValue();
13883
13884   // Always prefer FMAD to FMA for precision.
13885   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13886   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13887
13888   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
13889   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
13890   auto FuseFADD = [&](SDValue X, SDValue Y) {
13891     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
13892       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
13893         if (C->isExactlyValue(+1.0))
13894           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13895                              Y);
13896         if (C->isExactlyValue(-1.0))
13897           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13898                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13899       }
13900     }
13901     return SDValue();
13902   };
13903
13904   if (SDValue FMA = FuseFADD(N0, N1))
13905     return FMA;
13906   if (SDValue FMA = FuseFADD(N1, N0))
13907     return FMA;
13908
13909   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
13910   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
13911   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
13912   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
13913   auto FuseFSUB = [&](SDValue X, SDValue Y) {
13914     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
13915       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
13916         if (C0->isExactlyValue(+1.0))
13917           return DAG.getNode(PreferredFusedOpcode, SL, VT,
13918                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13919                              Y);
13920         if (C0->isExactlyValue(-1.0))
13921           return DAG.getNode(PreferredFusedOpcode, SL, VT,
13922                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13923                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13924       }
13925       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
13926         if (C1->isExactlyValue(+1.0))
13927           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13928                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13929         if (C1->isExactlyValue(-1.0))
13930           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13931                              Y);
13932       }
13933     }
13934     return SDValue();
13935   };
13936
13937   if (SDValue FMA = FuseFSUB(N0, N1))
13938     return FMA;
13939   if (SDValue FMA = FuseFSUB(N1, N0))
13940     return FMA;
13941
13942   return SDValue();
13943 }
13944
13945 SDValue DAGCombiner::visitFADD(SDNode *N) {
13946   SDValue N0 = N->getOperand(0);
13947   SDValue N1 = N->getOperand(1);
13948   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
13949   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
13950   EVT VT = N->getValueType(0);
13951   SDLoc DL(N);
13952   const TargetOptions &Options = DAG.getTarget().Options;
13953   SDNodeFlags Flags = N->getFlags();
13954   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13955
13956   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13957     return R;
13958
13959   // fold (fadd c1, c2) -> c1 + c2
13960   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
13961     return C;
13962
13963   // canonicalize constant to RHS
13964   if (N0CFP && !N1CFP)
13965     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
13966
13967   // fold vector ops
13968   if (VT.isVector())
13969     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
13970       return FoldedVOp;
13971
13972   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
13973   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
13974   if (N1C && N1C->isZero())
13975     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
13976       return N0;
13977
13978   if (SDValue NewSel = foldBinOpIntoSelect(N))
13979     return NewSel;
13980
13981   // fold (fadd A, (fneg B)) -> (fsub A, B)
13982   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13983     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13984             N1, DAG, LegalOperations, ForCodeSize))
13985       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
13986
13987   // fold (fadd (fneg A), B) -> (fsub B, A)
13988   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13989     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13990             N0, DAG, LegalOperations, ForCodeSize))
13991       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
13992
13993   auto isFMulNegTwo = [](SDValue FMul) {
13994     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
13995       return false;
13996     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
13997     return C && C->isExactlyValue(-2.0);
13998   };
13999
14000   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
14001   if (isFMulNegTwo(N0)) {
14002     SDValue B = N0.getOperand(0);
14003     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14004     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
14005   }
14006   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
14007   if (isFMulNegTwo(N1)) {
14008     SDValue B = N1.getOperand(0);
14009     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14010     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
14011   }
14012
14013   // No FP constant should be created after legalization as Instruction
14014   // Selection pass has a hard time dealing with FP constants.
14015   bool AllowNewConst = (Level < AfterLegalizeDAG);
14016
14017   // If nnan is enabled, fold lots of things.
14018   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
14019     // If allowed, fold (fadd (fneg x), x) -> 0.0
14020     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
14021       return DAG.getConstantFP(0.0, DL, VT);
14022
14023     // If allowed, fold (fadd x, (fneg x)) -> 0.0
14024     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
14025       return DAG.getConstantFP(0.0, DL, VT);
14026   }
14027
14028   // If 'unsafe math' or reassoc and nsz, fold lots of things.
14029   // TODO: break out portions of the transformations below for which Unsafe is
14030   //       considered and which do not require both nsz and reassoc
14031   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14032        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14033       AllowNewConst) {
14034     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
14035     if (N1CFP && N0.getOpcode() == ISD::FADD &&
14036         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14037       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
14038       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
14039     }
14040
14041     // We can fold chains of FADD's of the same value into multiplications.
14042     // This transform is not safe in general because we are reducing the number
14043     // of rounding steps.
14044     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
14045       if (N0.getOpcode() == ISD::FMUL) {
14046         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14047         bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
14048
14049         // (fadd (fmul x, c), x) -> (fmul x, c+1)
14050         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
14051           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14052                                        DAG.getConstantFP(1.0, DL, VT));
14053           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
14054         }
14055
14056         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
14057         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
14058             N1.getOperand(0) == N1.getOperand(1) &&
14059             N0.getOperand(0) == N1.getOperand(0)) {
14060           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14061                                        DAG.getConstantFP(2.0, DL, VT));
14062           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
14063         }
14064       }
14065
14066       if (N1.getOpcode() == ISD::FMUL) {
14067         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14068         bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
14069
14070         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
14071         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
14072           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14073                                        DAG.getConstantFP(1.0, DL, VT));
14074           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
14075         }
14076
14077         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
14078         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
14079             N0.getOperand(0) == N0.getOperand(1) &&
14080             N1.getOperand(0) == N0.getOperand(0)) {
14081           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14082                                        DAG.getConstantFP(2.0, DL, VT));
14083           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
14084         }
14085       }
14086
14087       if (N0.getOpcode() == ISD::FADD) {
14088         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14089         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
14090         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
14091             (N0.getOperand(0) == N1)) {
14092           return DAG.getNode(ISD::FMUL, DL, VT, N1,
14093                              DAG.getConstantFP(3.0, DL, VT));
14094         }
14095       }
14096
14097       if (N1.getOpcode() == ISD::FADD) {
14098         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14099         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
14100         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
14101             N1.getOperand(0) == N0) {
14102           return DAG.getNode(ISD::FMUL, DL, VT, N0,
14103                              DAG.getConstantFP(3.0, DL, VT));
14104         }
14105       }
14106
14107       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
14108       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
14109           N0.getOperand(0) == N0.getOperand(1) &&
14110           N1.getOperand(0) == N1.getOperand(1) &&
14111           N0.getOperand(0) == N1.getOperand(0)) {
14112         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
14113                            DAG.getConstantFP(4.0, DL, VT));
14114       }
14115     }
14116   } // enable-unsafe-fp-math
14117
14118   // FADD -> FMA combines:
14119   if (SDValue Fused = visitFADDForFMACombine(N)) {
14120     AddToWorklist(Fused.getNode());
14121     return Fused;
14122   }
14123   return SDValue();
14124 }
14125
14126 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
14127   SDValue Chain = N->getOperand(0);
14128   SDValue N0 = N->getOperand(1);
14129   SDValue N1 = N->getOperand(2);
14130   EVT VT = N->getValueType(0);
14131   EVT ChainVT = N->getValueType(1);
14132   SDLoc DL(N);
14133   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14134
14135   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
14136   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14137     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14138             N1, DAG, LegalOperations, ForCodeSize)) {
14139       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14140                          {Chain, N0, NegN1});
14141     }
14142
14143   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
14144   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14145     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14146             N0, DAG, LegalOperations, ForCodeSize)) {
14147       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14148                          {Chain, N1, NegN0});
14149     }
14150   return SDValue();
14151 }
14152
14153 SDValue DAGCombiner::visitFSUB(SDNode *N) {
14154   SDValue N0 = N->getOperand(0);
14155   SDValue N1 = N->getOperand(1);
14156   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
14157   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14158   EVT VT = N->getValueType(0);
14159   SDLoc DL(N);
14160   const TargetOptions &Options = DAG.getTarget().Options;
14161   const SDNodeFlags Flags = N->getFlags();
14162   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14163
14164   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14165     return R;
14166
14167   // fold (fsub c1, c2) -> c1-c2
14168   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
14169     return C;
14170
14171   // fold vector ops
14172   if (VT.isVector())
14173     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14174       return FoldedVOp;
14175
14176   if (SDValue NewSel = foldBinOpIntoSelect(N))
14177     return NewSel;
14178
14179   // (fsub A, 0) -> A
14180   if (N1CFP && N1CFP->isZero()) {
14181     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
14182         Flags.hasNoSignedZeros()) {
14183       return N0;
14184     }
14185   }
14186
14187   if (N0 == N1) {
14188     // (fsub x, x) -> 0.0
14189     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
14190       return DAG.getConstantFP(0.0f, DL, VT);
14191   }
14192
14193   // (fsub -0.0, N1) -> -N1
14194   if (N0CFP && N0CFP->isZero()) {
14195     if (N0CFP->isNegative() ||
14196         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
14197       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
14198       // flushed to zero, unless all users treat denorms as zero (DAZ).
14199       // FIXME: This transform will change the sign of a NaN and the behavior
14200       // of a signaling NaN. It is only valid when a NoNaN flag is present.
14201       DenormalMode DenormMode = DAG.getDenormalMode(VT);
14202       if (DenormMode == DenormalMode::getIEEE()) {
14203         if (SDValue NegN1 =
14204                 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14205           return NegN1;
14206         if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14207           return DAG.getNode(ISD::FNEG, DL, VT, N1);
14208       }
14209     }
14210   }
14211
14212   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14213        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14214       N1.getOpcode() == ISD::FADD) {
14215     // X - (X + Y) -> -Y
14216     if (N0 == N1->getOperand(0))
14217       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
14218     // X - (Y + X) -> -Y
14219     if (N0 == N1->getOperand(1))
14220       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
14221   }
14222
14223   // fold (fsub A, (fneg B)) -> (fadd A, B)
14224   if (SDValue NegN1 =
14225           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14226     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
14227
14228   // FSUB -> FMA combines:
14229   if (SDValue Fused = visitFSUBForFMACombine(N)) {
14230     AddToWorklist(Fused.getNode());
14231     return Fused;
14232   }
14233
14234   return SDValue();
14235 }
14236
14237 SDValue DAGCombiner::visitFMUL(SDNode *N) {
14238   SDValue N0 = N->getOperand(0);
14239   SDValue N1 = N->getOperand(1);
14240   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14241   EVT VT = N->getValueType(0);
14242   SDLoc DL(N);
14243   const TargetOptions &Options = DAG.getTarget().Options;
14244   const SDNodeFlags Flags = N->getFlags();
14245   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14246
14247   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14248     return R;
14249
14250   // fold (fmul c1, c2) -> c1*c2
14251   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
14252     return C;
14253
14254   // canonicalize constant to RHS
14255   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14256      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14257     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
14258
14259   // fold vector ops
14260   if (VT.isVector())
14261     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14262       return FoldedVOp;
14263
14264   if (SDValue NewSel = foldBinOpIntoSelect(N))
14265     return NewSel;
14266
14267   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
14268     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
14269     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14270         N0.getOpcode() == ISD::FMUL) {
14271       SDValue N00 = N0.getOperand(0);
14272       SDValue N01 = N0.getOperand(1);
14273       // Avoid an infinite loop by making sure that N00 is not a constant
14274       // (the inner multiply has not been constant folded yet).
14275       if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
14276           !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
14277         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
14278         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
14279       }
14280     }
14281
14282     // Match a special-case: we convert X * 2.0 into fadd.
14283     // fmul (fadd X, X), C -> fmul X, 2.0 * C
14284     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
14285         N0.getOperand(0) == N0.getOperand(1)) {
14286       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
14287       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
14288       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
14289     }
14290   }
14291
14292   // fold (fmul X, 2.0) -> (fadd X, X)
14293   if (N1CFP && N1CFP->isExactlyValue(+2.0))
14294     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
14295
14296   // fold (fmul X, -1.0) -> (fsub -0.0, X)
14297   if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
14298     if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
14299       return DAG.getNode(ISD::FSUB, DL, VT,
14300                          DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
14301     }
14302   }
14303
14304   // -N0 * -N1 --> N0 * N1
14305   TargetLowering::NegatibleCost CostN0 =
14306       TargetLowering::NegatibleCost::Expensive;
14307   TargetLowering::NegatibleCost CostN1 =
14308       TargetLowering::NegatibleCost::Expensive;
14309   SDValue NegN0 =
14310       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14311   SDValue NegN1 =
14312       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14313   if (NegN0 && NegN1 &&
14314       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14315        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14316     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14317
14318   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14319   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14320   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14321       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14322       TLI.isOperationLegal(ISD::FABS, VT)) {
14323     SDValue Select = N0, X = N1;
14324     if (Select.getOpcode() != ISD::SELECT)
14325       std::swap(Select, X);
14326
14327     SDValue Cond = Select.getOperand(0);
14328     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14329     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14330
14331     if (TrueOpnd && FalseOpnd &&
14332         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14333         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14334         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14335       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14336       switch (CC) {
14337       default: break;
14338       case ISD::SETOLT:
14339       case ISD::SETULT:
14340       case ISD::SETOLE:
14341       case ISD::SETULE:
14342       case ISD::SETLT:
14343       case ISD::SETLE:
14344         std::swap(TrueOpnd, FalseOpnd);
14345         LLVM_FALLTHROUGH;
14346       case ISD::SETOGT:
14347       case ISD::SETUGT:
14348       case ISD::SETOGE:
14349       case ISD::SETUGE:
14350       case ISD::SETGT:
14351       case ISD::SETGE:
14352         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14353             TLI.isOperationLegal(ISD::FNEG, VT))
14354           return DAG.getNode(ISD::FNEG, DL, VT,
14355                    DAG.getNode(ISD::FABS, DL, VT, X));
14356         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14357           return DAG.getNode(ISD::FABS, DL, VT, X);
14358
14359         break;
14360       }
14361     }
14362   }
14363
14364   // FMUL -> FMA combines:
14365   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14366     AddToWorklist(Fused.getNode());
14367     return Fused;
14368   }
14369
14370   return SDValue();
14371 }
14372
14373 SDValue DAGCombiner::visitFMA(SDNode *N) {
14374   SDValue N0 = N->getOperand(0);
14375   SDValue N1 = N->getOperand(1);
14376   SDValue N2 = N->getOperand(2);
14377   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14378   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14379   EVT VT = N->getValueType(0);
14380   SDLoc DL(N);
14381   const TargetOptions &Options = DAG.getTarget().Options;
14382   // FMA nodes have flags that propagate to the created nodes.
14383   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14384
14385   bool UnsafeFPMath =
14386       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14387
14388   // Constant fold FMA.
14389   if (isa<ConstantFPSDNode>(N0) &&
14390       isa<ConstantFPSDNode>(N1) &&
14391       isa<ConstantFPSDNode>(N2)) {
14392     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14393   }
14394
14395   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14396   TargetLowering::NegatibleCost CostN0 =
14397       TargetLowering::NegatibleCost::Expensive;
14398   TargetLowering::NegatibleCost CostN1 =
14399       TargetLowering::NegatibleCost::Expensive;
14400   SDValue NegN0 =
14401       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14402   SDValue NegN1 =
14403       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14404   if (NegN0 && NegN1 &&
14405       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14406        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14407     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14408
14409   if (UnsafeFPMath) {
14410     if (N0CFP && N0CFP->isZero())
14411       return N2;
14412     if (N1CFP && N1CFP->isZero())
14413       return N2;
14414   }
14415
14416   if (N0CFP && N0CFP->isExactlyValue(1.0))
14417     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14418   if (N1CFP && N1CFP->isExactlyValue(1.0))
14419     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14420
14421   // Canonicalize (fma c, x, y) -> (fma x, c, y)
14422   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14423      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14424     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14425
14426   if (UnsafeFPMath) {
14427     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14428     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14429         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14430         DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
14431       return DAG.getNode(ISD::FMUL, DL, VT, N0,
14432                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14433     }
14434
14435     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14436     if (N0.getOpcode() == ISD::FMUL &&
14437         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14438         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14439       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14440                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14441                          N2);
14442     }
14443   }
14444
14445   // (fma x, -1, y) -> (fadd (fneg x), y)
14446   if (N1CFP) {
14447     if (N1CFP->isExactlyValue(1.0))
14448       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14449
14450     if (N1CFP->isExactlyValue(-1.0) &&
14451         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14452       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14453       AddToWorklist(RHSNeg.getNode());
14454       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14455     }
14456
14457     // fma (fneg x), K, y -> fma x -K, y
14458     if (N0.getOpcode() == ISD::FNEG &&
14459         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14460          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14461                                               ForCodeSize)))) {
14462       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14463                          DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14464     }
14465   }
14466
14467   if (UnsafeFPMath) {
14468     // (fma x, c, x) -> (fmul x, (c+1))
14469     if (N1CFP && N0 == N2) {
14470       return DAG.getNode(
14471           ISD::FMUL, DL, VT, N0,
14472           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14473     }
14474
14475     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14476     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14477       return DAG.getNode(
14478           ISD::FMUL, DL, VT, N0,
14479           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14480     }
14481   }
14482
14483   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14484   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14485   if (!TLI.isFNegFree(VT))
14486     if (SDValue Neg = TLI.getCheaperNegatedExpression(
14487             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14488       return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14489   return SDValue();
14490 }
14491
14492 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14493 // reciprocal.
14494 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14495 // Notice that this is not always beneficial. One reason is different targets
14496 // may have different costs for FDIV and FMUL, so sometimes the cost of two
14497 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14498 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14499 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14500   // TODO: Limit this transform based on optsize/minsize - it always creates at
14501   //       least 1 extra instruction. But the perf win may be substantial enough
14502   //       that only minsize should restrict this.
14503   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
14504   const SDNodeFlags Flags = N->getFlags();
14505   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14506     return SDValue();
14507
14508   // Skip if current node is a reciprocal/fneg-reciprocal.
14509   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14510   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14511   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14512     return SDValue();
14513
14514   // Exit early if the target does not want this transform or if there can't
14515   // possibly be enough uses of the divisor to make the transform worthwhile.
14516   unsigned MinUses = TLI.combineRepeatedFPDivisors();
14517
14518   // For splat vectors, scale the number of uses by the splat factor. If we can
14519   // convert the division into a scalar op, that will likely be much faster.
14520   unsigned NumElts = 1;
14521   EVT VT = N->getValueType(0);
14522   if (VT.isVector() && DAG.isSplatValue(N1))
14523     NumElts = VT.getVectorNumElements();
14524
14525   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14526     return SDValue();
14527
14528   // Find all FDIV users of the same divisor.
14529   // Use a set because duplicates may be present in the user list.
14530   SetVector<SDNode *> Users;
14531   for (auto *U : N1->uses()) {
14532     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14533       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14534       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14535           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14536           U->getFlags().hasAllowReassociation() &&
14537           U->getFlags().hasNoSignedZeros())
14538         continue;
14539
14540       // This division is eligible for optimization only if global unsafe math
14541       // is enabled or if this division allows reciprocal formation.
14542       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14543         Users.insert(U);
14544     }
14545   }
14546
14547   // Now that we have the actual number of divisor uses, make sure it meets
14548   // the minimum threshold specified by the target.
14549   if ((Users.size() * NumElts) < MinUses)
14550     return SDValue();
14551
14552   SDLoc DL(N);
14553   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14554   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14555
14556   // Dividend / Divisor -> Dividend * Reciprocal
14557   for (auto *U : Users) {
14558     SDValue Dividend = U->getOperand(0);
14559     if (Dividend != FPOne) {
14560       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14561                                     Reciprocal, Flags);
14562       CombineTo(U, NewNode);
14563     } else if (U != Reciprocal.getNode()) {
14564       // In the absence of fast-math-flags, this user node is always the
14565       // same node as Reciprocal, but with FMF they may be different nodes.
14566       CombineTo(U, Reciprocal);
14567     }
14568   }
14569   return SDValue(N, 0);  // N was replaced.
14570 }
14571
14572 SDValue DAGCombiner::visitFDIV(SDNode *N) {
14573   SDValue N0 = N->getOperand(0);
14574   SDValue N1 = N->getOperand(1);
14575   EVT VT = N->getValueType(0);
14576   SDLoc DL(N);
14577   const TargetOptions &Options = DAG.getTarget().Options;
14578   SDNodeFlags Flags = N->getFlags();
14579   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14580
14581   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14582     return R;
14583
14584   // fold (fdiv c1, c2) -> c1/c2
14585   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
14586     return C;
14587
14588   // fold vector ops
14589   if (VT.isVector())
14590     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14591       return FoldedVOp;
14592
14593   if (SDValue NewSel = foldBinOpIntoSelect(N))
14594     return NewSel;
14595
14596   if (SDValue V = combineRepeatedFPDivisors(N))
14597     return V;
14598
14599   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14600     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14601     if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
14602       // Compute the reciprocal 1.0 / c2.
14603       const APFloat &N1APF = N1CFP->getValueAPF();
14604       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14605       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14606       // Only do the transform if the reciprocal is a legal fp immediate that
14607       // isn't too nasty (eg NaN, denormal, ...).
14608       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14609           (!LegalOperations ||
14610            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14611            // backend)... we should handle this gracefully after Legalize.
14612            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14613            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14614            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14615         return DAG.getNode(ISD::FMUL, DL, VT, N0,
14616                            DAG.getConstantFP(Recip, DL, VT));
14617     }
14618
14619     // If this FDIV is part of a reciprocal square root, it may be folded
14620     // into a target-specific square root estimate instruction.
14621     if (N1.getOpcode() == ISD::FSQRT) {
14622       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14623         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14624     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14625                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14626       if (SDValue RV =
14627               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14628         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14629         AddToWorklist(RV.getNode());
14630         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14631       }
14632     } else if (N1.getOpcode() == ISD::FP_ROUND &&
14633                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14634       if (SDValue RV =
14635               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14636         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14637         AddToWorklist(RV.getNode());
14638         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14639       }
14640     } else if (N1.getOpcode() == ISD::FMUL) {
14641       // Look through an FMUL. Even though this won't remove the FDIV directly,
14642       // it's still worthwhile to get rid of the FSQRT if possible.
14643       SDValue Sqrt, Y;
14644       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14645         Sqrt = N1.getOperand(0);
14646         Y = N1.getOperand(1);
14647       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14648         Sqrt = N1.getOperand(1);
14649         Y = N1.getOperand(0);
14650       }
14651       if (Sqrt.getNode()) {
14652         // If the other multiply operand is known positive, pull it into the
14653         // sqrt. That will eliminate the division if we convert to an estimate.
14654         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14655             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14656           SDValue A;
14657           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14658             A = Y.getOperand(0);
14659           else if (Y == Sqrt.getOperand(0))
14660             A = Y;
14661           if (A) {
14662             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14663             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14664             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14665             SDValue AAZ =
14666                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14667             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14668               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14669
14670             // Estimate creation failed. Clean up speculatively created nodes.
14671             recursivelyDeleteUnusedNodes(AAZ.getNode());
14672           }
14673         }
14674
14675         // We found a FSQRT, so try to make this fold:
14676         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14677         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14678           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14679           AddToWorklist(Div.getNode());
14680           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14681         }
14682       }
14683     }
14684
14685     // Fold into a reciprocal estimate and multiply instead of a real divide.
14686     if (Options.NoInfsFPMath || Flags.hasNoInfs())
14687       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14688         return RV;
14689   }
14690
14691   // Fold X/Sqrt(X) -> Sqrt(X)
14692   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14693       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14694     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14695       return N1;
14696
14697   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14698   TargetLowering::NegatibleCost CostN0 =
14699       TargetLowering::NegatibleCost::Expensive;
14700   TargetLowering::NegatibleCost CostN1 =
14701       TargetLowering::NegatibleCost::Expensive;
14702   SDValue NegN0 =
14703       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14704   SDValue NegN1 =
14705       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14706   if (NegN0 && NegN1 &&
14707       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14708        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14709     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14710
14711   return SDValue();
14712 }
14713
14714 SDValue DAGCombiner::visitFREM(SDNode *N) {
14715   SDValue N0 = N->getOperand(0);
14716   SDValue N1 = N->getOperand(1);
14717   EVT VT = N->getValueType(0);
14718   SDNodeFlags Flags = N->getFlags();
14719   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14720
14721   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14722     return R;
14723
14724   // fold (frem c1, c2) -> fmod(c1,c2)
14725   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
14726     return C;
14727
14728   if (SDValue NewSel = foldBinOpIntoSelect(N))
14729     return NewSel;
14730
14731   return SDValue();
14732 }
14733
14734 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14735   SDNodeFlags Flags = N->getFlags();
14736   const TargetOptions &Options = DAG.getTarget().Options;
14737
14738   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14739   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14740   if (!Flags.hasApproximateFuncs() ||
14741       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14742     return SDValue();
14743
14744   SDValue N0 = N->getOperand(0);
14745   if (TLI.isFsqrtCheap(N0, DAG))
14746     return SDValue();
14747
14748   // FSQRT nodes have flags that propagate to the created nodes.
14749   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14750   //       transform the fdiv, we may produce a sub-optimal estimate sequence
14751   //       because the reciprocal calculation may not have to filter out a
14752   //       0.0 input.
14753   return buildSqrtEstimate(N0, Flags);
14754 }
14755
14756 /// copysign(x, fp_extend(y)) -> copysign(x, y)
14757 /// copysign(x, fp_round(y)) -> copysign(x, y)
14758 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
14759   SDValue N1 = N->getOperand(1);
14760   if ((N1.getOpcode() == ISD::FP_EXTEND ||
14761        N1.getOpcode() == ISD::FP_ROUND)) {
14762     EVT N1VT = N1->getValueType(0);
14763     EVT N1Op0VT = N1->getOperand(0).getValueType();
14764
14765     // Always fold no-op FP casts.
14766     if (N1VT == N1Op0VT)
14767       return true;
14768
14769     // Do not optimize out type conversion of f128 type yet.
14770     // For some targets like x86_64, configuration is changed to keep one f128
14771     // value in one SSE register, but instruction selection cannot handle
14772     // FCOPYSIGN on SSE registers yet.
14773     if (N1Op0VT == MVT::f128)
14774       return false;
14775
14776     // Avoid mismatched vector operand types, for better instruction selection.
14777     if (N1Op0VT.isVector())
14778       return false;
14779
14780     return true;
14781   }
14782   return false;
14783 }
14784
14785 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14786   SDValue N0 = N->getOperand(0);
14787   SDValue N1 = N->getOperand(1);
14788   EVT VT = N->getValueType(0);
14789
14790   // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
14791   if (SDValue C =
14792           DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
14793     return C;
14794
14795   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14796     const APFloat &V = N1C->getValueAPF();
14797     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
14798     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14799     if (!V.isNegative()) {
14800       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14801         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14802     } else {
14803       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14804         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14805                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14806     }
14807   }
14808
14809   // copysign(fabs(x), y) -> copysign(x, y)
14810   // copysign(fneg(x), y) -> copysign(x, y)
14811   // copysign(copysign(x,z), y) -> copysign(x, y)
14812   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14813       N0.getOpcode() == ISD::FCOPYSIGN)
14814     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14815
14816   // copysign(x, abs(y)) -> abs(x)
14817   if (N1.getOpcode() == ISD::FABS)
14818     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14819
14820   // copysign(x, copysign(y,z)) -> copysign(x, z)
14821   if (N1.getOpcode() == ISD::FCOPYSIGN)
14822     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14823
14824   // copysign(x, fp_extend(y)) -> copysign(x, y)
14825   // copysign(x, fp_round(y)) -> copysign(x, y)
14826   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
14827     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14828
14829   return SDValue();
14830 }
14831
14832 SDValue DAGCombiner::visitFPOW(SDNode *N) {
14833   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14834   if (!ExponentC)
14835     return SDValue();
14836   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14837
14838   // Try to convert x ** (1/3) into cube root.
14839   // TODO: Handle the various flavors of long double.
14840   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14841   //       Some range near 1/3 should be fine.
14842   EVT VT = N->getValueType(0);
14843   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14844       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14845     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14846     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14847     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
14848     // For regular numbers, rounding may cause the results to differ.
14849     // Therefore, we require { nsz ninf nnan afn } for this transform.
14850     // TODO: We could select out the special cases if we don't have nsz/ninf.
14851     SDNodeFlags Flags = N->getFlags();
14852     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14853         !Flags.hasApproximateFuncs())
14854       return SDValue();
14855
14856     // Do not create a cbrt() libcall if the target does not have it, and do not
14857     // turn a pow that has lowering support into a cbrt() libcall.
14858     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14859         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14860          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14861       return SDValue();
14862
14863     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14864   }
14865
14866   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14867   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14868   // TODO: This could be extended (using a target hook) to handle smaller
14869   // power-of-2 fractional exponents.
14870   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14871   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14872   if (ExponentIs025 || ExponentIs075) {
14873     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
14874     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
14875     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
14876     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
14877     // For regular numbers, rounding may cause the results to differ.
14878     // Therefore, we require { nsz ninf afn } for this transform.
14879     // TODO: We could select out the special cases if we don't have nsz/ninf.
14880     SDNodeFlags Flags = N->getFlags();
14881
14882     // We only need no signed zeros for the 0.25 case.
14883     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
14884         !Flags.hasApproximateFuncs())
14885       return SDValue();
14886
14887     // Don't double the number of libcalls. We are trying to inline fast code.
14888     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
14889       return SDValue();
14890
14891     // Assume that libcalls are the smallest code.
14892     // TODO: This restriction should probably be lifted for vectors.
14893     if (ForCodeSize)
14894       return SDValue();
14895
14896     // pow(X, 0.25) --> sqrt(sqrt(X))
14897     SDLoc DL(N);
14898     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
14899     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
14900     if (ExponentIs025)
14901       return SqrtSqrt;
14902     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
14903     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
14904   }
14905
14906   return SDValue();
14907 }
14908
14909 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
14910                                const TargetLowering &TLI) {
14911   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
14912   // replacing casts with a libcall. We also must be allowed to ignore -0.0
14913   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
14914   // conversions would return +0.0.
14915   // FIXME: We should be able to use node-level FMF here.
14916   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
14917   EVT VT = N->getValueType(0);
14918   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
14919       !DAG.getTarget().Options.NoSignedZerosFPMath)
14920     return SDValue();
14921
14922   // fptosi/fptoui round towards zero, so converting from FP to integer and
14923   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
14924   SDValue N0 = N->getOperand(0);
14925   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
14926       N0.getOperand(0).getValueType() == VT)
14927     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14928
14929   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
14930       N0.getOperand(0).getValueType() == VT)
14931     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14932
14933   return SDValue();
14934 }
14935
14936 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
14937   SDValue N0 = N->getOperand(0);
14938   EVT VT = N->getValueType(0);
14939   EVT OpVT = N0.getValueType();
14940
14941   // [us]itofp(undef) = 0, because the result value is bounded.
14942   if (N0.isUndef())
14943     return DAG.getConstantFP(0.0, SDLoc(N), VT);
14944
14945   // fold (sint_to_fp c1) -> c1fp
14946   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14947       // ...but only if the target supports immediate floating-point values
14948       (!LegalOperations ||
14949        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14950     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14951
14952   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
14953   // but UINT_TO_FP is legal on this target, try to convert.
14954   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
14955       hasOperation(ISD::UINT_TO_FP, OpVT)) {
14956     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
14957     if (DAG.SignBitIsZero(N0))
14958       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14959   }
14960
14961   // The next optimizations are desirable only if SELECT_CC can be lowered.
14962   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
14963   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
14964       !VT.isVector() &&
14965       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14966     SDLoc DL(N);
14967     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
14968                          DAG.getConstantFP(0.0, DL, VT));
14969   }
14970
14971   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
14972   //      (select (setcc x, y, cc), 1.0, 0.0)
14973   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
14974       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
14975       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14976     SDLoc DL(N);
14977     return DAG.getSelect(DL, VT, N0.getOperand(0),
14978                          DAG.getConstantFP(1.0, DL, VT),
14979                          DAG.getConstantFP(0.0, DL, VT));
14980   }
14981
14982   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14983     return FTrunc;
14984
14985   return SDValue();
14986 }
14987
14988 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
14989   SDValue N0 = N->getOperand(0);
14990   EVT VT = N->getValueType(0);
14991   EVT OpVT = N0.getValueType();
14992
14993   // [us]itofp(undef) = 0, because the result value is bounded.
14994   if (N0.isUndef())
14995     return DAG.getConstantFP(0.0, SDLoc(N), VT);
14996
14997   // fold (uint_to_fp c1) -> c1fp
14998   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14999       // ...but only if the target supports immediate floating-point values
15000       (!LegalOperations ||
15001        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
15002     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15003
15004   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
15005   // but SINT_TO_FP is legal on this target, try to convert.
15006   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
15007       hasOperation(ISD::SINT_TO_FP, OpVT)) {
15008     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
15009     if (DAG.SignBitIsZero(N0))
15010       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15011   }
15012
15013   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
15014   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
15015       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15016     SDLoc DL(N);
15017     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
15018                          DAG.getConstantFP(0.0, DL, VT));
15019   }
15020
15021   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15022     return FTrunc;
15023
15024   return SDValue();
15025 }
15026
15027 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
15028 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
15029   SDValue N0 = N->getOperand(0);
15030   EVT VT = N->getValueType(0);
15031
15032   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
15033     return SDValue();
15034
15035   SDValue Src = N0.getOperand(0);
15036   EVT SrcVT = Src.getValueType();
15037   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
15038   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
15039
15040   // We can safely assume the conversion won't overflow the output range,
15041   // because (for example) (uint8_t)18293.f is undefined behavior.
15042
15043   // Since we can assume the conversion won't overflow, our decision as to
15044   // whether the input will fit in the float should depend on the minimum
15045   // of the input range and output range.
15046
15047   // This means this is also safe for a signed input and unsigned output, since
15048   // a negative input would lead to undefined behavior.
15049   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
15050   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
15051   unsigned ActualSize = std::min(InputSize, OutputSize);
15052   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
15053
15054   // We can only fold away the float conversion if the input range can be
15055   // represented exactly in the float range.
15056   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
15057     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
15058       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
15059                                                        : ISD::ZERO_EXTEND;
15060       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
15061     }
15062     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
15063       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
15064     return DAG.getBitcast(VT, Src);
15065   }
15066   return SDValue();
15067 }
15068
15069 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
15070   SDValue N0 = N->getOperand(0);
15071   EVT VT = N->getValueType(0);
15072
15073   // fold (fp_to_sint undef) -> undef
15074   if (N0.isUndef())
15075     return DAG.getUNDEF(VT);
15076
15077   // fold (fp_to_sint c1fp) -> c1
15078   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15079     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
15080
15081   return FoldIntToFPToInt(N, DAG);
15082 }
15083
15084 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
15085   SDValue N0 = N->getOperand(0);
15086   EVT VT = N->getValueType(0);
15087
15088   // fold (fp_to_uint undef) -> undef
15089   if (N0.isUndef())
15090     return DAG.getUNDEF(VT);
15091
15092   // fold (fp_to_uint c1fp) -> c1
15093   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15094     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
15095
15096   return FoldIntToFPToInt(N, DAG);
15097 }
15098
15099 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
15100   SDValue N0 = N->getOperand(0);
15101   SDValue N1 = N->getOperand(1);
15102   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
15103   EVT VT = N->getValueType(0);
15104
15105   // fold (fp_round c1fp) -> c1fp
15106   if (N0CFP)
15107     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
15108
15109   // fold (fp_round (fp_extend x)) -> x
15110   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
15111     return N0.getOperand(0);
15112
15113   // fold (fp_round (fp_round x)) -> (fp_round x)
15114   if (N0.getOpcode() == ISD::FP_ROUND) {
15115     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
15116     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
15117
15118     // Skip this folding if it results in an fp_round from f80 to f16.
15119     //
15120     // f80 to f16 always generates an expensive (and as yet, unimplemented)
15121     // libcall to __truncxfhf2 instead of selecting native f16 conversion
15122     // instructions from f32 or f64.  Moreover, the first (value-preserving)
15123     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
15124     // x86.
15125     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
15126       return SDValue();
15127
15128     // If the first fp_round isn't a value preserving truncation, it might
15129     // introduce a tie in the second fp_round, that wouldn't occur in the
15130     // single-step fp_round we want to fold to.
15131     // In other words, double rounding isn't the same as rounding.
15132     // Also, this is a value preserving truncation iff both fp_round's are.
15133     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
15134       SDLoc DL(N);
15135       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
15136                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
15137     }
15138   }
15139
15140   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
15141   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
15142     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
15143                               N0.getOperand(0), N1);
15144     AddToWorklist(Tmp.getNode());
15145     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
15146                        Tmp, N0.getOperand(1));
15147   }
15148
15149   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15150     return NewVSel;
15151
15152   return SDValue();
15153 }
15154
15155 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
15156   SDValue N0 = N->getOperand(0);
15157   EVT VT = N->getValueType(0);
15158
15159   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
15160   if (N->hasOneUse() &&
15161       N->use_begin()->getOpcode() == ISD::FP_ROUND)
15162     return SDValue();
15163
15164   // fold (fp_extend c1fp) -> c1fp
15165   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15166     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
15167
15168   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
15169   if (N0.getOpcode() == ISD::FP16_TO_FP &&
15170       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
15171     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
15172
15173   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
15174   // value of X.
15175   if (N0.getOpcode() == ISD::FP_ROUND
15176       && N0.getConstantOperandVal(1) == 1) {
15177     SDValue In = N0.getOperand(0);
15178     if (In.getValueType() == VT) return In;
15179     if (VT.bitsLT(In.getValueType()))
15180       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
15181                          In, N0.getOperand(1));
15182     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
15183   }
15184
15185   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
15186   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15187       TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
15188     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15189     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
15190                                      LN0->getChain(),
15191                                      LN0->getBasePtr(), N0.getValueType(),
15192                                      LN0->getMemOperand());
15193     CombineTo(N, ExtLoad);
15194     CombineTo(N0.getNode(),
15195               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
15196                           N0.getValueType(), ExtLoad,
15197                           DAG.getIntPtrConstant(1, SDLoc(N0))),
15198               ExtLoad.getValue(1));
15199     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15200   }
15201
15202   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15203     return NewVSel;
15204
15205   return SDValue();
15206 }
15207
15208 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
15209   SDValue N0 = N->getOperand(0);
15210   EVT VT = N->getValueType(0);
15211
15212   // fold (fceil c1) -> fceil(c1)
15213   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15214     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
15215
15216   return SDValue();
15217 }
15218
15219 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
15220   SDValue N0 = N->getOperand(0);
15221   EVT VT = N->getValueType(0);
15222
15223   // fold (ftrunc c1) -> ftrunc(c1)
15224   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15225     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
15226
15227   // fold ftrunc (known rounded int x) -> x
15228   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
15229   // likely to be generated to extract integer from a rounded floating value.
15230   switch (N0.getOpcode()) {
15231   default: break;
15232   case ISD::FRINT:
15233   case ISD::FTRUNC:
15234   case ISD::FNEARBYINT:
15235   case ISD::FFLOOR:
15236   case ISD::FCEIL:
15237     return N0;
15238   }
15239
15240   return SDValue();
15241 }
15242
15243 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
15244   SDValue N0 = N->getOperand(0);
15245   EVT VT = N->getValueType(0);
15246
15247   // fold (ffloor c1) -> ffloor(c1)
15248   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15249     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
15250
15251   return SDValue();
15252 }
15253
15254 SDValue DAGCombiner::visitFNEG(SDNode *N) {
15255   SDValue N0 = N->getOperand(0);
15256   EVT VT = N->getValueType(0);
15257   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15258
15259   // Constant fold FNEG.
15260   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15261     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
15262
15263   if (SDValue NegN0 =
15264           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
15265     return NegN0;
15266
15267   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
15268   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
15269   // know it was called from a context with a nsz flag if the input fsub does
15270   // not.
15271   if (N0.getOpcode() == ISD::FSUB &&
15272       (DAG.getTarget().Options.NoSignedZerosFPMath ||
15273        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
15274     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
15275                        N0.getOperand(0));
15276   }
15277
15278   if (SDValue Cast = foldSignChangeInBitcast(N))
15279     return Cast;
15280
15281   return SDValue();
15282 }
15283
15284 SDValue DAGCombiner::visitFMinMax(SDNode *N) {
15285   SDValue N0 = N->getOperand(0);
15286   SDValue N1 = N->getOperand(1);
15287   EVT VT = N->getValueType(0);
15288   const SDNodeFlags Flags = N->getFlags();
15289   unsigned Opc = N->getOpcode();
15290   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
15291   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
15292   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15293
15294   // Constant fold.
15295   if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
15296     return C;
15297
15298   // Canonicalize to constant on RHS.
15299   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15300       !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15301     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15302
15303   if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
15304     const APFloat &AF = N1CFP->getValueAPF();
15305
15306     // minnum(X, nan) -> X
15307     // maxnum(X, nan) -> X
15308     // minimum(X, nan) -> nan
15309     // maximum(X, nan) -> nan
15310     if (AF.isNaN())
15311       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15312
15313     // In the following folds, inf can be replaced with the largest finite
15314     // float, if the ninf flag is set.
15315     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15316       // minnum(X, -inf) -> -inf
15317       // maxnum(X, +inf) -> +inf
15318       // minimum(X, -inf) -> -inf if nnan
15319       // maximum(X, +inf) -> +inf if nnan
15320       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15321         return N->getOperand(1);
15322
15323       // minnum(X, +inf) -> X if nnan
15324       // maxnum(X, -inf) -> X if nnan
15325       // minimum(X, +inf) -> X
15326       // maximum(X, -inf) -> X
15327       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15328         return N->getOperand(0);
15329     }
15330   }
15331
15332   return SDValue();
15333 }
15334
15335 SDValue DAGCombiner::visitFABS(SDNode *N) {
15336   SDValue N0 = N->getOperand(0);
15337   EVT VT = N->getValueType(0);
15338
15339   // fold (fabs c1) -> fabs(c1)
15340   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15341     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15342
15343   // fold (fabs (fabs x)) -> (fabs x)
15344   if (N0.getOpcode() == ISD::FABS)
15345     return N->getOperand(0);
15346
15347   // fold (fabs (fneg x)) -> (fabs x)
15348   // fold (fabs (fcopysign x, y)) -> (fabs x)
15349   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15350     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15351
15352   if (SDValue Cast = foldSignChangeInBitcast(N))
15353     return Cast;
15354
15355   return SDValue();
15356 }
15357
15358 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15359   SDValue Chain = N->getOperand(0);
15360   SDValue N1 = N->getOperand(1);
15361   SDValue N2 = N->getOperand(2);
15362
15363   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15364   // nondeterministic jumps).
15365   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15366     return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15367                        N1->getOperand(0), N2);
15368   }
15369
15370   // If N is a constant we could fold this into a fallthrough or unconditional
15371   // branch. However that doesn't happen very often in normal code, because
15372   // Instcombine/SimplifyCFG should have handled the available opportunities.
15373   // If we did this folding here, it would be necessary to update the
15374   // MachineBasicBlock CFG, which is awkward.
15375
15376   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15377   // on the target.
15378   if (N1.getOpcode() == ISD::SETCC &&
15379       TLI.isOperationLegalOrCustom(ISD::BR_CC,
15380                                    N1.getOperand(0).getValueType())) {
15381     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15382                        Chain, N1.getOperand(2),
15383                        N1.getOperand(0), N1.getOperand(1), N2);
15384   }
15385
15386   if (N1.hasOneUse()) {
15387     // rebuildSetCC calls visitXor which may change the Chain when there is a
15388     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15389     HandleSDNode ChainHandle(Chain);
15390     if (SDValue NewN1 = rebuildSetCC(N1))
15391       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15392                          ChainHandle.getValue(), NewN1, N2);
15393   }
15394
15395   return SDValue();
15396 }
15397
15398 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15399   if (N.getOpcode() == ISD::SRL ||
15400       (N.getOpcode() == ISD::TRUNCATE &&
15401        (N.getOperand(0).hasOneUse() &&
15402         N.getOperand(0).getOpcode() == ISD::SRL))) {
15403     // Look pass the truncate.
15404     if (N.getOpcode() == ISD::TRUNCATE)
15405       N = N.getOperand(0);
15406
15407     // Match this pattern so that we can generate simpler code:
15408     //
15409     //   %a = ...
15410     //   %b = and i32 %a, 2
15411     //   %c = srl i32 %b, 1
15412     //   brcond i32 %c ...
15413     //
15414     // into
15415     //
15416     //   %a = ...
15417     //   %b = and i32 %a, 2
15418     //   %c = setcc eq %b, 0
15419     //   brcond %c ...
15420     //
15421     // This applies only when the AND constant value has one bit set and the
15422     // SRL constant is equal to the log2 of the AND constant. The back-end is
15423     // smart enough to convert the result into a TEST/JMP sequence.
15424     SDValue Op0 = N.getOperand(0);
15425     SDValue Op1 = N.getOperand(1);
15426
15427     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15428       SDValue AndOp1 = Op0.getOperand(1);
15429
15430       if (AndOp1.getOpcode() == ISD::Constant) {
15431         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15432
15433         if (AndConst.isPowerOf2() &&
15434             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15435           SDLoc DL(N);
15436           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15437                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15438                               ISD::SETNE);
15439         }
15440       }
15441     }
15442   }
15443
15444   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15445   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15446   if (N.getOpcode() == ISD::XOR) {
15447     // Because we may call this on a speculatively constructed
15448     // SimplifiedSetCC Node, we need to simplify this node first.
15449     // Ideally this should be folded into SimplifySetCC and not
15450     // here. For now, grab a handle to N so we don't lose it from
15451     // replacements interal to the visit.
15452     HandleSDNode XORHandle(N);
15453     while (N.getOpcode() == ISD::XOR) {
15454       SDValue Tmp = visitXOR(N.getNode());
15455       // No simplification done.
15456       if (!Tmp.getNode())
15457         break;
15458       // Returning N is form in-visit replacement that may invalidated
15459       // N. Grab value from Handle.
15460       if (Tmp.getNode() == N.getNode())
15461         N = XORHandle.getValue();
15462       else // Node simplified. Try simplifying again.
15463         N = Tmp;
15464     }
15465
15466     if (N.getOpcode() != ISD::XOR)
15467       return N;
15468
15469     SDValue Op0 = N->getOperand(0);
15470     SDValue Op1 = N->getOperand(1);
15471
15472     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15473       bool Equal = false;
15474       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15475       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15476           Op0.getValueType() == MVT::i1) {
15477         N = Op0;
15478         Op0 = N->getOperand(0);
15479         Op1 = N->getOperand(1);
15480         Equal = true;
15481       }
15482
15483       EVT SetCCVT = N.getValueType();
15484       if (LegalTypes)
15485         SetCCVT = getSetCCResultType(SetCCVT);
15486       // Replace the uses of XOR with SETCC
15487       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15488                           Equal ? ISD::SETEQ : ISD::SETNE);
15489     }
15490   }
15491
15492   return SDValue();
15493 }
15494
15495 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15496 //
15497 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15498   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15499   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15500
15501   // If N is a constant we could fold this into a fallthrough or unconditional
15502   // branch. However that doesn't happen very often in normal code, because
15503   // Instcombine/SimplifyCFG should have handled the available opportunities.
15504   // If we did this folding here, it would be necessary to update the
15505   // MachineBasicBlock CFG, which is awkward.
15506
15507   // Use SimplifySetCC to simplify SETCC's.
15508   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
15509                                CondLHS, CondRHS, CC->get(), SDLoc(N),
15510                                false);
15511   if (Simp.getNode()) AddToWorklist(Simp.getNode());
15512
15513   // fold to a simpler setcc
15514   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15515     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15516                        N->getOperand(0), Simp.getOperand(2),
15517                        Simp.getOperand(0), Simp.getOperand(1),
15518                        N->getOperand(4));
15519
15520   return SDValue();
15521 }
15522
15523 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15524                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15525                                      const TargetLowering &TLI) {
15526   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15527     if (LD->isIndexed())
15528       return false;
15529     EVT VT = LD->getMemoryVT();
15530     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15531       return false;
15532     Ptr = LD->getBasePtr();
15533   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15534     if (ST->isIndexed())
15535       return false;
15536     EVT VT = ST->getMemoryVT();
15537     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15538       return false;
15539     Ptr = ST->getBasePtr();
15540     IsLoad = false;
15541   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15542     if (LD->isIndexed())
15543       return false;
15544     EVT VT = LD->getMemoryVT();
15545     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15546         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15547       return false;
15548     Ptr = LD->getBasePtr();
15549     IsMasked = true;
15550   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15551     if (ST->isIndexed())
15552       return false;
15553     EVT VT = ST->getMemoryVT();
15554     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15555         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15556       return false;
15557     Ptr = ST->getBasePtr();
15558     IsLoad = false;
15559     IsMasked = true;
15560   } else {
15561     return false;
15562   }
15563   return true;
15564 }
15565
15566 /// Try turning a load/store into a pre-indexed load/store when the base
15567 /// pointer is an add or subtract and it has other uses besides the load/store.
15568 /// After the transformation, the new indexed load/store has effectively folded
15569 /// the add/subtract in and all of its other uses are redirected to the
15570 /// new load/store.
15571 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15572   if (Level < AfterLegalizeDAG)
15573     return false;
15574
15575   bool IsLoad = true;
15576   bool IsMasked = false;
15577   SDValue Ptr;
15578   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15579                                 Ptr, TLI))
15580     return false;
15581
15582   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15583   // out.  There is no reason to make this a preinc/predec.
15584   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15585       Ptr.getNode()->hasOneUse())
15586     return false;
15587
15588   // Ask the target to do addressing mode selection.
15589   SDValue BasePtr;
15590   SDValue Offset;
15591   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15592   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15593     return false;
15594
15595   // Backends without true r+i pre-indexed forms may need to pass a
15596   // constant base with a variable offset so that constant coercion
15597   // will work with the patterns in canonical form.
15598   bool Swapped = false;
15599   if (isa<ConstantSDNode>(BasePtr)) {
15600     std::swap(BasePtr, Offset);
15601     Swapped = true;
15602   }
15603
15604   // Don't create a indexed load / store with zero offset.
15605   if (isNullConstant(Offset))
15606     return false;
15607
15608   // Try turning it into a pre-indexed load / store except when:
15609   // 1) The new base ptr is a frame index.
15610   // 2) If N is a store and the new base ptr is either the same as or is a
15611   //    predecessor of the value being stored.
15612   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15613   //    that would create a cycle.
15614   // 4) All uses are load / store ops that use it as old base ptr.
15615
15616   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
15617   // (plus the implicit offset) to a register to preinc anyway.
15618   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15619     return false;
15620
15621   // Check #2.
15622   if (!IsLoad) {
15623     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15624                            : cast<StoreSDNode>(N)->getValue();
15625
15626     // Would require a copy.
15627     if (Val == BasePtr)
15628       return false;
15629
15630     // Would create a cycle.
15631     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15632       return false;
15633   }
15634
15635   // Caches for hasPredecessorHelper.
15636   SmallPtrSet<const SDNode *, 32> Visited;
15637   SmallVector<const SDNode *, 16> Worklist;
15638   Worklist.push_back(N);
15639
15640   // If the offset is a constant, there may be other adds of constants that
15641   // can be folded with this one. We should do this to avoid having to keep
15642   // a copy of the original base pointer.
15643   SmallVector<SDNode *, 16> OtherUses;
15644   if (isa<ConstantSDNode>(Offset))
15645     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15646                               UE = BasePtr.getNode()->use_end();
15647          UI != UE; ++UI) {
15648       SDUse &Use = UI.getUse();
15649       // Skip the use that is Ptr and uses of other results from BasePtr's
15650       // node (important for nodes that return multiple results).
15651       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15652         continue;
15653
15654       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15655         continue;
15656
15657       if (Use.getUser()->getOpcode() != ISD::ADD &&
15658           Use.getUser()->getOpcode() != ISD::SUB) {
15659         OtherUses.clear();
15660         break;
15661       }
15662
15663       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15664       if (!isa<ConstantSDNode>(Op1)) {
15665         OtherUses.clear();
15666         break;
15667       }
15668
15669       // FIXME: In some cases, we can be smarter about this.
15670       if (Op1.getValueType() != Offset.getValueType()) {
15671         OtherUses.clear();
15672         break;
15673       }
15674
15675       OtherUses.push_back(Use.getUser());
15676     }
15677
15678   if (Swapped)
15679     std::swap(BasePtr, Offset);
15680
15681   // Now check for #3 and #4.
15682   bool RealUse = false;
15683
15684   for (SDNode *Use : Ptr.getNode()->uses()) {
15685     if (Use == N)
15686       continue;
15687     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15688       return false;
15689
15690     // If Ptr may be folded in addressing mode of other use, then it's
15691     // not profitable to do this transformation.
15692     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15693       RealUse = true;
15694   }
15695
15696   if (!RealUse)
15697     return false;
15698
15699   SDValue Result;
15700   if (!IsMasked) {
15701     if (IsLoad)
15702       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15703     else
15704       Result =
15705           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15706   } else {
15707     if (IsLoad)
15708       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15709                                         Offset, AM);
15710     else
15711       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15712                                          Offset, AM);
15713   }
15714   ++PreIndexedNodes;
15715   ++NodesCombined;
15716   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
15717              Result.getNode()->dump(&DAG); dbgs() << '\n');
15718   WorklistRemover DeadNodes(*this);
15719   if (IsLoad) {
15720     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15721     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15722   } else {
15723     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15724   }
15725
15726   // Finally, since the node is now dead, remove it from the graph.
15727   deleteAndRecombine(N);
15728
15729   if (Swapped)
15730     std::swap(BasePtr, Offset);
15731
15732   // Replace other uses of BasePtr that can be updated to use Ptr
15733   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15734     unsigned OffsetIdx = 1;
15735     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15736       OffsetIdx = 0;
15737     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
15738            BasePtr.getNode() && "Expected BasePtr operand");
15739
15740     // We need to replace ptr0 in the following expression:
15741     //   x0 * offset0 + y0 * ptr0 = t0
15742     // knowing that
15743     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15744     //
15745     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15746     // indexed load/store and the expression that needs to be re-written.
15747     //
15748     // Therefore, we have:
15749     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15750
15751     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15752     const APInt &Offset0 = CN->getAPIntValue();
15753     const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15754     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15755     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15756     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15757     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15758
15759     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15760
15761     APInt CNV = Offset0;
15762     if (X0 < 0) CNV = -CNV;
15763     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15764     else CNV = CNV - Offset1;
15765
15766     SDLoc DL(OtherUses[i]);
15767
15768     // We can now generate the new expression.
15769     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15770     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15771
15772     SDValue NewUse = DAG.getNode(Opcode,
15773                                  DL,
15774                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15775     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15776     deleteAndRecombine(OtherUses[i]);
15777   }
15778
15779   // Replace the uses of Ptr with uses of the updated base value.
15780   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15781   deleteAndRecombine(Ptr.getNode());
15782   AddToWorklist(Result.getNode());
15783
15784   return true;
15785 }
15786
15787 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15788                                    SDValue &BasePtr, SDValue &Offset,
15789                                    ISD::MemIndexedMode &AM,
15790                                    SelectionDAG &DAG,
15791                                    const TargetLowering &TLI) {
15792   if (PtrUse == N ||
15793       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15794     return false;
15795
15796   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15797     return false;
15798
15799   // Don't create a indexed load / store with zero offset.
15800   if (isNullConstant(Offset))
15801     return false;
15802
15803   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15804     return false;
15805
15806   SmallPtrSet<const SDNode *, 32> Visited;
15807   for (SDNode *Use : BasePtr.getNode()->uses()) {
15808     if (Use == Ptr.getNode())
15809       continue;
15810
15811     // No if there's a later user which could perform the index instead.
15812     if (isa<MemSDNode>(Use)) {
15813       bool IsLoad = true;
15814       bool IsMasked = false;
15815       SDValue OtherPtr;
15816       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15817                                    IsMasked, OtherPtr, TLI)) {
15818         SmallVector<const SDNode *, 2> Worklist;
15819         Worklist.push_back(Use);
15820         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15821           return false;
15822       }
15823     }
15824
15825     // If all the uses are load / store addresses, then don't do the
15826     // transformation.
15827     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15828       for (SDNode *UseUse : Use->uses())
15829         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15830           return false;
15831     }
15832   }
15833   return true;
15834 }
15835
15836 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15837                                          bool &IsMasked, SDValue &Ptr,
15838                                          SDValue &BasePtr, SDValue &Offset,
15839                                          ISD::MemIndexedMode &AM,
15840                                          SelectionDAG &DAG,
15841                                          const TargetLowering &TLI) {
15842   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15843                                 IsMasked, Ptr, TLI) ||
15844       Ptr.getNode()->hasOneUse())
15845     return nullptr;
15846
15847   // Try turning it into a post-indexed load / store except when
15848   // 1) All uses are load / store ops that use it as base ptr (and
15849   //    it may be folded as addressing mmode).
15850   // 2) Op must be independent of N, i.e. Op is neither a predecessor
15851   //    nor a successor of N. Otherwise, if Op is folded that would
15852   //    create a cycle.
15853   for (SDNode *Op : Ptr->uses()) {
15854     // Check for #1.
15855     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15856       continue;
15857
15858     // Check for #2.
15859     SmallPtrSet<const SDNode *, 32> Visited;
15860     SmallVector<const SDNode *, 8> Worklist;
15861     // Ptr is predecessor to both N and Op.
15862     Visited.insert(Ptr.getNode());
15863     Worklist.push_back(N);
15864     Worklist.push_back(Op);
15865     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15866         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15867       return Op;
15868   }
15869   return nullptr;
15870 }
15871
15872 /// Try to combine a load/store with a add/sub of the base pointer node into a
15873 /// post-indexed load/store. The transformation folded the add/subtract into the
15874 /// new indexed load/store effectively and all of its uses are redirected to the
15875 /// new load/store.
15876 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
15877   if (Level < AfterLegalizeDAG)
15878     return false;
15879
15880   bool IsLoad = true;
15881   bool IsMasked = false;
15882   SDValue Ptr;
15883   SDValue BasePtr;
15884   SDValue Offset;
15885   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15886   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
15887                                          Offset, AM, DAG, TLI);
15888   if (!Op)
15889     return false;
15890
15891   SDValue Result;
15892   if (!IsMasked)
15893     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15894                                          Offset, AM)
15895                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
15896                                           BasePtr, Offset, AM);
15897   else
15898     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
15899                                                BasePtr, Offset, AM)
15900                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
15901                                                 BasePtr, Offset, AM);
15902   ++PostIndexedNodes;
15903   ++NodesCombined;
15904   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
15905              dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
15906              dbgs() << '\n');
15907   WorklistRemover DeadNodes(*this);
15908   if (IsLoad) {
15909     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15910     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15911   } else {
15912     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15913   }
15914
15915   // Finally, since the node is now dead, remove it from the graph.
15916   deleteAndRecombine(N);
15917
15918   // Replace the uses of Use with uses of the updated base value.
15919   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
15920                                 Result.getValue(IsLoad ? 1 : 0));
15921   deleteAndRecombine(Op);
15922   return true;
15923 }
15924
15925 /// Return the base-pointer arithmetic from an indexed \p LD.
15926 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
15927   ISD::MemIndexedMode AM = LD->getAddressingMode();
15928   assert(AM != ISD::UNINDEXED);
15929   SDValue BP = LD->getOperand(1);
15930   SDValue Inc = LD->getOperand(2);
15931
15932   // Some backends use TargetConstants for load offsets, but don't expect
15933   // TargetConstants in general ADD nodes. We can convert these constants into
15934   // regular Constants (if the constant is not opaque).
15935   assert((Inc.getOpcode() != ISD::TargetConstant ||
15936           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
15937          "Cannot split out indexing using opaque target constants");
15938   if (Inc.getOpcode() == ISD::TargetConstant) {
15939     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
15940     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
15941                           ConstInc->getValueType(0));
15942   }
15943
15944   unsigned Opc =
15945       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
15946   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
15947 }
15948
15949 static inline ElementCount numVectorEltsOrZero(EVT T) {
15950   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
15951 }
15952
15953 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
15954   Val = ST->getValue();
15955   EVT STType = Val.getValueType();
15956   EVT STMemType = ST->getMemoryVT();
15957   if (STType == STMemType)
15958     return true;
15959   if (isTypeLegal(STMemType))
15960     return false; // fail.
15961   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
15962       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
15963     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
15964     return true;
15965   }
15966   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
15967       STType.isInteger() && STMemType.isInteger()) {
15968     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
15969     return true;
15970   }
15971   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
15972     Val = DAG.getBitcast(STMemType, Val);
15973     return true;
15974   }
15975   return false; // fail.
15976 }
15977
15978 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
15979   EVT LDMemType = LD->getMemoryVT();
15980   EVT LDType = LD->getValueType(0);
15981   assert(Val.getValueType() == LDMemType &&
15982          "Attempting to extend value of non-matching type");
15983   if (LDType == LDMemType)
15984     return true;
15985   if (LDMemType.isInteger() && LDType.isInteger()) {
15986     switch (LD->getExtensionType()) {
15987     case ISD::NON_EXTLOAD:
15988       Val = DAG.getBitcast(LDType, Val);
15989       return true;
15990     case ISD::EXTLOAD:
15991       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
15992       return true;
15993     case ISD::SEXTLOAD:
15994       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
15995       return true;
15996     case ISD::ZEXTLOAD:
15997       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
15998       return true;
15999     }
16000   }
16001   return false;
16002 }
16003
16004 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
16005   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
16006     return SDValue();
16007   SDValue Chain = LD->getOperand(0);
16008   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
16009   // TODO: Relax this restriction for unordered atomics (see D66309)
16010   if (!ST || !ST->isSimple())
16011     return SDValue();
16012
16013   EVT LDType = LD->getValueType(0);
16014   EVT LDMemType = LD->getMemoryVT();
16015   EVT STMemType = ST->getMemoryVT();
16016   EVT STType = ST->getValue().getValueType();
16017
16018   // There are two cases to consider here:
16019   //  1. The store is fixed width and the load is scalable. In this case we
16020   //     don't know at compile time if the store completely envelops the load
16021   //     so we abandon the optimisation.
16022   //  2. The store is scalable and the load is fixed width. We could
16023   //     potentially support a limited number of cases here, but there has been
16024   //     no cost-benefit analysis to prove it's worth it.
16025   bool LdStScalable = LDMemType.isScalableVector();
16026   if (LdStScalable != STMemType.isScalableVector())
16027     return SDValue();
16028
16029   // If we are dealing with scalable vectors on a big endian platform the
16030   // calculation of offsets below becomes trickier, since we do not know at
16031   // compile time the absolute size of the vector. Until we've done more
16032   // analysis on big-endian platforms it seems better to bail out for now.
16033   if (LdStScalable && DAG.getDataLayout().isBigEndian())
16034     return SDValue();
16035
16036   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
16037   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
16038   int64_t Offset;
16039   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
16040     return SDValue();
16041
16042   // Normalize for Endianness. After this Offset=0 will denote that the least
16043   // significant bit in the loaded value maps to the least significant bit in
16044   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
16045   // n:th least significant byte of the stored value.
16046   if (DAG.getDataLayout().isBigEndian())
16047     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
16048               (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
16049                  8 -
16050              Offset;
16051
16052   // Check that the stored value cover all bits that are loaded.
16053   bool STCoversLD;
16054
16055   TypeSize LdMemSize = LDMemType.getSizeInBits();
16056   TypeSize StMemSize = STMemType.getSizeInBits();
16057   if (LdStScalable)
16058     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
16059   else
16060     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
16061                                    StMemSize.getFixedSize());
16062
16063   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
16064     if (LD->isIndexed()) {
16065       // Cannot handle opaque target constants and we must respect the user's
16066       // request not to split indexes from loads.
16067       if (!canSplitIdx(LD))
16068         return SDValue();
16069       SDValue Idx = SplitIndexingFromLoad(LD);
16070       SDValue Ops[] = {Val, Idx, Chain};
16071       return CombineTo(LD, Ops, 3);
16072     }
16073     return CombineTo(LD, Val, Chain);
16074   };
16075
16076   if (!STCoversLD)
16077     return SDValue();
16078
16079   // Memory as copy space (potentially masked).
16080   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
16081     // Simple case: Direct non-truncating forwarding
16082     if (LDType.getSizeInBits() == LdMemSize)
16083       return ReplaceLd(LD, ST->getValue(), Chain);
16084     // Can we model the truncate and extension with an and mask?
16085     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
16086         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
16087       // Mask to size of LDMemType
16088       auto Mask =
16089           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
16090                                                StMemSize.getFixedSize()),
16091                           SDLoc(ST), STType);
16092       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
16093       return ReplaceLd(LD, Val, Chain);
16094     }
16095   }
16096
16097   // TODO: Deal with nonzero offset.
16098   if (LD->getBasePtr().isUndef() || Offset != 0)
16099     return SDValue();
16100   // Model necessary truncations / extenstions.
16101   SDValue Val;
16102   // Truncate Value To Stored Memory Size.
16103   do {
16104     if (!getTruncatedStoreValue(ST, Val))
16105       continue;
16106     if (!isTypeLegal(LDMemType))
16107       continue;
16108     if (STMemType != LDMemType) {
16109       // TODO: Support vectors? This requires extract_subvector/bitcast.
16110       if (!STMemType.isVector() && !LDMemType.isVector() &&
16111           STMemType.isInteger() && LDMemType.isInteger())
16112         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
16113       else
16114         continue;
16115     }
16116     if (!extendLoadedValueToExtension(LD, Val))
16117       continue;
16118     return ReplaceLd(LD, Val, Chain);
16119   } while (false);
16120
16121   // On failure, cleanup dead nodes we may have created.
16122   if (Val->use_empty())
16123     deleteAndRecombine(Val.getNode());
16124   return SDValue();
16125 }
16126
16127 SDValue DAGCombiner::visitLOAD(SDNode *N) {
16128   LoadSDNode *LD  = cast<LoadSDNode>(N);
16129   SDValue Chain = LD->getChain();
16130   SDValue Ptr   = LD->getBasePtr();
16131
16132   // If load is not volatile and there are no uses of the loaded value (and
16133   // the updated indexed value in case of indexed loads), change uses of the
16134   // chain value into uses of the chain input (i.e. delete the dead load).
16135   // TODO: Allow this for unordered atomics (see D66309)
16136   if (LD->isSimple()) {
16137     if (N->getValueType(1) == MVT::Other) {
16138       // Unindexed loads.
16139       if (!N->hasAnyUseOfValue(0)) {
16140         // It's not safe to use the two value CombineTo variant here. e.g.
16141         // v1, chain2 = load chain1, loc
16142         // v2, chain3 = load chain2, loc
16143         // v3         = add v2, c
16144         // Now we replace use of chain2 with chain1.  This makes the second load
16145         // isomorphic to the one we are deleting, and thus makes this load live.
16146         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
16147                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
16148                    dbgs() << "\n");
16149         WorklistRemover DeadNodes(*this);
16150         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16151         AddUsersToWorklist(Chain.getNode());
16152         if (N->use_empty())
16153           deleteAndRecombine(N);
16154
16155         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
16156       }
16157     } else {
16158       // Indexed loads.
16159       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
16160
16161       // If this load has an opaque TargetConstant offset, then we cannot split
16162       // the indexing into an add/sub directly (that TargetConstant may not be
16163       // valid for a different type of node, and we cannot convert an opaque
16164       // target constant into a regular constant).
16165       bool CanSplitIdx = canSplitIdx(LD);
16166
16167       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
16168         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
16169         SDValue Index;
16170         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
16171           Index = SplitIndexingFromLoad(LD);
16172           // Try to fold the base pointer arithmetic into subsequent loads and
16173           // stores.
16174           AddUsersToWorklist(N);
16175         } else
16176           Index = DAG.getUNDEF(N->getValueType(1));
16177         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
16178                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
16179                    dbgs() << " and 2 other values\n");
16180         WorklistRemover DeadNodes(*this);
16181         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
16182         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
16183         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
16184         deleteAndRecombine(N);
16185         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
16186       }
16187     }
16188   }
16189
16190   // If this load is directly stored, replace the load value with the stored
16191   // value.
16192   if (auto V = ForwardStoreValueToDirectLoad(LD))
16193     return V;
16194
16195   // Try to infer better alignment information than the load already has.
16196   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
16197     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
16198       if (*Alignment > LD->getAlign() &&
16199           isAligned(*Alignment, LD->getSrcValueOffset())) {
16200         SDValue NewLoad = DAG.getExtLoad(
16201             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
16202             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
16203             LD->getMemOperand()->getFlags(), LD->getAAInfo());
16204         // NewLoad will always be N as we are only refining the alignment
16205         assert(NewLoad.getNode() == N);
16206         (void)NewLoad;
16207       }
16208     }
16209   }
16210
16211   if (LD->isUnindexed()) {
16212     // Walk up chain skipping non-aliasing memory nodes.
16213     SDValue BetterChain = FindBetterChain(LD, Chain);
16214
16215     // If there is a better chain.
16216     if (Chain != BetterChain) {
16217       SDValue ReplLoad;
16218
16219       // Replace the chain to void dependency.
16220       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
16221         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
16222                                BetterChain, Ptr, LD->getMemOperand());
16223       } else {
16224         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
16225                                   LD->getValueType(0),
16226                                   BetterChain, Ptr, LD->getMemoryVT(),
16227                                   LD->getMemOperand());
16228       }
16229
16230       // Create token factor to keep old chain connected.
16231       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
16232                                   MVT::Other, Chain, ReplLoad.getValue(1));
16233
16234       // Replace uses with load result and token factor
16235       return CombineTo(N, ReplLoad.getValue(0), Token);
16236     }
16237   }
16238
16239   // Try transforming N to an indexed load.
16240   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16241     return SDValue(N, 0);
16242
16243   // Try to slice up N to more direct loads if the slices are mapped to
16244   // different register banks or pairing can take place.
16245   if (SliceUpLoad(N))
16246     return SDValue(N, 0);
16247
16248   return SDValue();
16249 }
16250
16251 namespace {
16252
16253 /// Helper structure used to slice a load in smaller loads.
16254 /// Basically a slice is obtained from the following sequence:
16255 /// Origin = load Ty1, Base
16256 /// Shift = srl Ty1 Origin, CstTy Amount
16257 /// Inst = trunc Shift to Ty2
16258 ///
16259 /// Then, it will be rewritten into:
16260 /// Slice = load SliceTy, Base + SliceOffset
16261 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
16262 ///
16263 /// SliceTy is deduced from the number of bits that are actually used to
16264 /// build Inst.
16265 struct LoadedSlice {
16266   /// Helper structure used to compute the cost of a slice.
16267   struct Cost {
16268     /// Are we optimizing for code size.
16269     bool ForCodeSize = false;
16270
16271     /// Various cost.
16272     unsigned Loads = 0;
16273     unsigned Truncates = 0;
16274     unsigned CrossRegisterBanksCopies = 0;
16275     unsigned ZExts = 0;
16276     unsigned Shift = 0;
16277
16278     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16279
16280     /// Get the cost of one isolated slice.
16281     Cost(const LoadedSlice &LS, bool ForCodeSize)
16282         : ForCodeSize(ForCodeSize), Loads(1) {
16283       EVT TruncType = LS.Inst->getValueType(0);
16284       EVT LoadedType = LS.getLoadedType();
16285       if (TruncType != LoadedType &&
16286           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16287         ZExts = 1;
16288     }
16289
16290     /// Account for slicing gain in the current cost.
16291     /// Slicing provide a few gains like removing a shift or a
16292     /// truncate. This method allows to grow the cost of the original
16293     /// load with the gain from this slice.
16294     void addSliceGain(const LoadedSlice &LS) {
16295       // Each slice saves a truncate.
16296       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16297       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16298                               LS.Inst->getValueType(0)))
16299         ++Truncates;
16300       // If there is a shift amount, this slice gets rid of it.
16301       if (LS.Shift)
16302         ++Shift;
16303       // If this slice can merge a cross register bank copy, account for it.
16304       if (LS.canMergeExpensiveCrossRegisterBankCopy())
16305         ++CrossRegisterBanksCopies;
16306     }
16307
16308     Cost &operator+=(const Cost &RHS) {
16309       Loads += RHS.Loads;
16310       Truncates += RHS.Truncates;
16311       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16312       ZExts += RHS.ZExts;
16313       Shift += RHS.Shift;
16314       return *this;
16315     }
16316
16317     bool operator==(const Cost &RHS) const {
16318       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16319              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16320              ZExts == RHS.ZExts && Shift == RHS.Shift;
16321     }
16322
16323     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16324
16325     bool operator<(const Cost &RHS) const {
16326       // Assume cross register banks copies are as expensive as loads.
16327       // FIXME: Do we want some more target hooks?
16328       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16329       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16330       // Unless we are optimizing for code size, consider the
16331       // expensive operation first.
16332       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16333         return ExpensiveOpsLHS < ExpensiveOpsRHS;
16334       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16335              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16336     }
16337
16338     bool operator>(const Cost &RHS) const { return RHS < *this; }
16339
16340     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16341
16342     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16343   };
16344
16345   // The last instruction that represent the slice. This should be a
16346   // truncate instruction.
16347   SDNode *Inst;
16348
16349   // The original load instruction.
16350   LoadSDNode *Origin;
16351
16352   // The right shift amount in bits from the original load.
16353   unsigned Shift;
16354
16355   // The DAG from which Origin came from.
16356   // This is used to get some contextual information about legal types, etc.
16357   SelectionDAG *DAG;
16358
16359   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16360               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16361       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16362
16363   /// Get the bits used in a chunk of bits \p BitWidth large.
16364   /// \return Result is \p BitWidth and has used bits set to 1 and
16365   ///         not used bits set to 0.
16366   APInt getUsedBits() const {
16367     // Reproduce the trunc(lshr) sequence:
16368     // - Start from the truncated value.
16369     // - Zero extend to the desired bit width.
16370     // - Shift left.
16371     assert(Origin && "No original load to compare against.");
16372     unsigned BitWidth = Origin->getValueSizeInBits(0);
16373     assert(Inst && "This slice is not bound to an instruction");
16374     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
16375            "Extracted slice is bigger than the whole type!");
16376     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16377     UsedBits.setAllBits();
16378     UsedBits = UsedBits.zext(BitWidth);
16379     UsedBits <<= Shift;
16380     return UsedBits;
16381   }
16382
16383   /// Get the size of the slice to be loaded in bytes.
16384   unsigned getLoadedSize() const {
16385     unsigned SliceSize = getUsedBits().countPopulation();
16386     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
16387     return SliceSize / 8;
16388   }
16389
16390   /// Get the type that will be loaded for this slice.
16391   /// Note: This may not be the final type for the slice.
16392   EVT getLoadedType() const {
16393     assert(DAG && "Missing context");
16394     LLVMContext &Ctxt = *DAG->getContext();
16395     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16396   }
16397
16398   /// Get the alignment of the load used for this slice.
16399   Align getAlign() const {
16400     Align Alignment = Origin->getAlign();
16401     uint64_t Offset = getOffsetFromBase();
16402     if (Offset != 0)
16403       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16404     return Alignment;
16405   }
16406
16407   /// Check if this slice can be rewritten with legal operations.
16408   bool isLegal() const {
16409     // An invalid slice is not legal.
16410     if (!Origin || !Inst || !DAG)
16411       return false;
16412
16413     // Offsets are for indexed load only, we do not handle that.
16414     if (!Origin->getOffset().isUndef())
16415       return false;
16416
16417     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16418
16419     // Check that the type is legal.
16420     EVT SliceType = getLoadedType();
16421     if (!TLI.isTypeLegal(SliceType))
16422       return false;
16423
16424     // Check that the load is legal for this type.
16425     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16426       return false;
16427
16428     // Check that the offset can be computed.
16429     // 1. Check its type.
16430     EVT PtrType = Origin->getBasePtr().getValueType();
16431     if (PtrType == MVT::Untyped || PtrType.isExtended())
16432       return false;
16433
16434     // 2. Check that it fits in the immediate.
16435     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16436       return false;
16437
16438     // 3. Check that the computation is legal.
16439     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16440       return false;
16441
16442     // Check that the zext is legal if it needs one.
16443     EVT TruncateType = Inst->getValueType(0);
16444     if (TruncateType != SliceType &&
16445         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
16446       return false;
16447
16448     return true;
16449   }
16450
16451   /// Get the offset in bytes of this slice in the original chunk of
16452   /// bits.
16453   /// \pre DAG != nullptr.
16454   uint64_t getOffsetFromBase() const {
16455     assert(DAG && "Missing context.");
16456     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16457     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
16458     uint64_t Offset = Shift / 8;
16459     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16460     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
16461            "The size of the original loaded type is not a multiple of a"
16462            " byte.");
16463     // If Offset is bigger than TySizeInBytes, it means we are loading all
16464     // zeros. This should have been optimized before in the process.
16465     assert(TySizeInBytes > Offset &&
16466            "Invalid shift amount for given loaded size");
16467     if (IsBigEndian)
16468       Offset = TySizeInBytes - Offset - getLoadedSize();
16469     return Offset;
16470   }
16471
16472   /// Generate the sequence of instructions to load the slice
16473   /// represented by this object and redirect the uses of this slice to
16474   /// this new sequence of instructions.
16475   /// \pre this->Inst && this->Origin are valid Instructions and this
16476   /// object passed the legal check: LoadedSlice::isLegal returned true.
16477   /// \return The last instruction of the sequence used to load the slice.
16478   SDValue loadSlice() const {
16479     assert(Inst && Origin && "Unable to replace a non-existing slice.");
16480     const SDValue &OldBaseAddr = Origin->getBasePtr();
16481     SDValue BaseAddr = OldBaseAddr;
16482     // Get the offset in that chunk of bytes w.r.t. the endianness.
16483     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16484     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
16485     if (Offset) {
16486       // BaseAddr = BaseAddr + Offset.
16487       EVT ArithType = BaseAddr.getValueType();
16488       SDLoc DL(Origin);
16489       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16490                               DAG->getConstant(Offset, DL, ArithType));
16491     }
16492
16493     // Create the type of the loaded slice according to its size.
16494     EVT SliceType = getLoadedType();
16495
16496     // Create the load for the slice.
16497     SDValue LastInst =
16498         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16499                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
16500                      Origin->getMemOperand()->getFlags());
16501     // If the final type is not the same as the loaded type, this means that
16502     // we have to pad with zero. Create a zero extend for that.
16503     EVT FinalType = Inst->getValueType(0);
16504     if (SliceType != FinalType)
16505       LastInst =
16506           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16507     return LastInst;
16508   }
16509
16510   /// Check if this slice can be merged with an expensive cross register
16511   /// bank copy. E.g.,
16512   /// i = load i32
16513   /// f = bitcast i32 i to float
16514   bool canMergeExpensiveCrossRegisterBankCopy() const {
16515     if (!Inst || !Inst->hasOneUse())
16516       return false;
16517     SDNode *Use = *Inst->use_begin();
16518     if (Use->getOpcode() != ISD::BITCAST)
16519       return false;
16520     assert(DAG && "Missing context");
16521     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16522     EVT ResVT = Use->getValueType(0);
16523     const TargetRegisterClass *ResRC =
16524         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16525     const TargetRegisterClass *ArgRC =
16526         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16527                            Use->getOperand(0)->isDivergent());
16528     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16529       return false;
16530
16531     // At this point, we know that we perform a cross-register-bank copy.
16532     // Check if it is expensive.
16533     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
16534     // Assume bitcasts are cheap, unless both register classes do not
16535     // explicitly share a common sub class.
16536     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16537       return false;
16538
16539     // Check if it will be merged with the load.
16540     // 1. Check the alignment / fast memory access constraint.
16541     bool IsFast = false;
16542     if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
16543                                 Origin->getAddressSpace(), getAlign(),
16544                                 Origin->getMemOperand()->getFlags(), &IsFast) ||
16545         !IsFast)
16546       return false;
16547
16548     // 2. Check that the load is a legal operation for that type.
16549     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16550       return false;
16551
16552     // 3. Check that we do not have a zext in the way.
16553     if (Inst->getValueType(0) != getLoadedType())
16554       return false;
16555
16556     return true;
16557   }
16558 };
16559
16560 } // end anonymous namespace
16561
16562 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
16563 /// \p UsedBits looks like 0..0 1..1 0..0.
16564 static bool areUsedBitsDense(const APInt &UsedBits) {
16565   // If all the bits are one, this is dense!
16566   if (UsedBits.isAllOnes())
16567     return true;
16568
16569   // Get rid of the unused bits on the right.
16570   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16571   // Get rid of the unused bits on the left.
16572   if (NarrowedUsedBits.countLeadingZeros())
16573     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16574   // Check that the chunk of bits is completely used.
16575   return NarrowedUsedBits.isAllOnes();
16576 }
16577
16578 /// Check whether or not \p First and \p Second are next to each other
16579 /// in memory. This means that there is no hole between the bits loaded
16580 /// by \p First and the bits loaded by \p Second.
16581 static bool areSlicesNextToEachOther(const LoadedSlice &First,
16582                                      const LoadedSlice &Second) {
16583   assert(First.Origin == Second.Origin && First.Origin &&
16584          "Unable to match different memory origins.");
16585   APInt UsedBits = First.getUsedBits();
16586   assert((UsedBits & Second.getUsedBits()) == 0 &&
16587          "Slices are not supposed to overlap.");
16588   UsedBits |= Second.getUsedBits();
16589   return areUsedBitsDense(UsedBits);
16590 }
16591
16592 /// Adjust the \p GlobalLSCost according to the target
16593 /// paring capabilities and the layout of the slices.
16594 /// \pre \p GlobalLSCost should account for at least as many loads as
16595 /// there is in the slices in \p LoadedSlices.
16596 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16597                                  LoadedSlice::Cost &GlobalLSCost) {
16598   unsigned NumberOfSlices = LoadedSlices.size();
16599   // If there is less than 2 elements, no pairing is possible.
16600   if (NumberOfSlices < 2)
16601     return;
16602
16603   // Sort the slices so that elements that are likely to be next to each
16604   // other in memory are next to each other in the list.
16605   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16606     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
16607     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16608   });
16609   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16610   // First (resp. Second) is the first (resp. Second) potentially candidate
16611   // to be placed in a paired load.
16612   const LoadedSlice *First = nullptr;
16613   const LoadedSlice *Second = nullptr;
16614   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16615                 // Set the beginning of the pair.
16616                                                            First = Second) {
16617     Second = &LoadedSlices[CurrSlice];
16618
16619     // If First is NULL, it means we start a new pair.
16620     // Get to the next slice.
16621     if (!First)
16622       continue;
16623
16624     EVT LoadedType = First->getLoadedType();
16625
16626     // If the types of the slices are different, we cannot pair them.
16627     if (LoadedType != Second->getLoadedType())
16628       continue;
16629
16630     // Check if the target supplies paired loads for this type.
16631     Align RequiredAlignment;
16632     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16633       // move to the next pair, this type is hopeless.
16634       Second = nullptr;
16635       continue;
16636     }
16637     // Check if we meet the alignment requirement.
16638     if (First->getAlign() < RequiredAlignment)
16639       continue;
16640
16641     // Check that both loads are next to each other in memory.
16642     if (!areSlicesNextToEachOther(*First, *Second))
16643       continue;
16644
16645     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
16646     --GlobalLSCost.Loads;
16647     // Move to the next pair.
16648     Second = nullptr;
16649   }
16650 }
16651
16652 /// Check the profitability of all involved LoadedSlice.
16653 /// Currently, it is considered profitable if there is exactly two
16654 /// involved slices (1) which are (2) next to each other in memory, and
16655 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16656 ///
16657 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
16658 /// the elements themselves.
16659 ///
16660 /// FIXME: When the cost model will be mature enough, we can relax
16661 /// constraints (1) and (2).
16662 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16663                                 const APInt &UsedBits, bool ForCodeSize) {
16664   unsigned NumberOfSlices = LoadedSlices.size();
16665   if (StressLoadSlicing)
16666     return NumberOfSlices > 1;
16667
16668   // Check (1).
16669   if (NumberOfSlices != 2)
16670     return false;
16671
16672   // Check (2).
16673   if (!areUsedBitsDense(UsedBits))
16674     return false;
16675
16676   // Check (3).
16677   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16678   // The original code has one big load.
16679   OrigCost.Loads = 1;
16680   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16681     const LoadedSlice &LS = LoadedSlices[CurrSlice];
16682     // Accumulate the cost of all the slices.
16683     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16684     GlobalSlicingCost += SliceCost;
16685
16686     // Account as cost in the original configuration the gain obtained
16687     // with the current slices.
16688     OrigCost.addSliceGain(LS);
16689   }
16690
16691   // If the target supports paired load, adjust the cost accordingly.
16692   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16693   return OrigCost > GlobalSlicingCost;
16694 }
16695
16696 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
16697 /// operations, split it in the various pieces being extracted.
16698 ///
16699 /// This sort of thing is introduced by SROA.
16700 /// This slicing takes care not to insert overlapping loads.
16701 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
16702 bool DAGCombiner::SliceUpLoad(SDNode *N) {
16703   if (Level < AfterLegalizeDAG)
16704     return false;
16705
16706   LoadSDNode *LD = cast<LoadSDNode>(N);
16707   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16708       !LD->getValueType(0).isInteger())
16709     return false;
16710
16711   // The algorithm to split up a load of a scalable vector into individual
16712   // elements currently requires knowing the length of the loaded type,
16713   // so will need adjusting to work on scalable vectors.
16714   if (LD->getValueType(0).isScalableVector())
16715     return false;
16716
16717   // Keep track of already used bits to detect overlapping values.
16718   // In that case, we will just abort the transformation.
16719   APInt UsedBits(LD->getValueSizeInBits(0), 0);
16720
16721   SmallVector<LoadedSlice, 4> LoadedSlices;
16722
16723   // Check if this load is used as several smaller chunks of bits.
16724   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16725   // of computation for each trunc.
16726   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16727        UI != UIEnd; ++UI) {
16728     // Skip the uses of the chain.
16729     if (UI.getUse().getResNo() != 0)
16730       continue;
16731
16732     SDNode *User = *UI;
16733     unsigned Shift = 0;
16734
16735     // Check if this is a trunc(lshr).
16736     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16737         isa<ConstantSDNode>(User->getOperand(1))) {
16738       Shift = User->getConstantOperandVal(1);
16739       User = *User->use_begin();
16740     }
16741
16742     // At this point, User is a Truncate, iff we encountered, trunc or
16743     // trunc(lshr).
16744     if (User->getOpcode() != ISD::TRUNCATE)
16745       return false;
16746
16747     // The width of the type must be a power of 2 and greater than 8-bits.
16748     // Otherwise the load cannot be represented in LLVM IR.
16749     // Moreover, if we shifted with a non-8-bits multiple, the slice
16750     // will be across several bytes. We do not support that.
16751     unsigned Width = User->getValueSizeInBits(0);
16752     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16753       return false;
16754
16755     // Build the slice for this chain of computations.
16756     LoadedSlice LS(User, LD, Shift, &DAG);
16757     APInt CurrentUsedBits = LS.getUsedBits();
16758
16759     // Check if this slice overlaps with another.
16760     if ((CurrentUsedBits & UsedBits) != 0)
16761       return false;
16762     // Update the bits used globally.
16763     UsedBits |= CurrentUsedBits;
16764
16765     // Check if the new slice would be legal.
16766     if (!LS.isLegal())
16767       return false;
16768
16769     // Record the slice.
16770     LoadedSlices.push_back(LS);
16771   }
16772
16773   // Abort slicing if it does not seem to be profitable.
16774   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16775     return false;
16776
16777   ++SlicedLoads;
16778
16779   // Rewrite each chain to use an independent load.
16780   // By construction, each chain can be represented by a unique load.
16781
16782   // Prepare the argument for the new token factor for all the slices.
16783   SmallVector<SDValue, 8> ArgChains;
16784   for (const LoadedSlice &LS : LoadedSlices) {
16785     SDValue SliceInst = LS.loadSlice();
16786     CombineTo(LS.Inst, SliceInst, true);
16787     if (SliceInst.getOpcode() != ISD::LOAD)
16788       SliceInst = SliceInst.getOperand(0);
16789     assert(SliceInst->getOpcode() == ISD::LOAD &&
16790            "It takes more than a zext to get to the loaded slice!!");
16791     ArgChains.push_back(SliceInst.getValue(1));
16792   }
16793
16794   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
16795                               ArgChains);
16796   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16797   AddToWorklist(Chain.getNode());
16798   return true;
16799 }
16800
16801 /// Check to see if V is (and load (ptr), imm), where the load is having
16802 /// specific bytes cleared out.  If so, return the byte size being masked out
16803 /// and the shift amount.
16804 static std::pair<unsigned, unsigned>
16805 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
16806   std::pair<unsigned, unsigned> Result(0, 0);
16807
16808   // Check for the structure we're looking for.
16809   if (V->getOpcode() != ISD::AND ||
16810       !isa<ConstantSDNode>(V->getOperand(1)) ||
16811       !ISD::isNormalLoad(V->getOperand(0).getNode()))
16812     return Result;
16813
16814   // Check the chain and pointer.
16815   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16816   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
16817
16818   // This only handles simple types.
16819   if (V.getValueType() != MVT::i16 &&
16820       V.getValueType() != MVT::i32 &&
16821       V.getValueType() != MVT::i64)
16822     return Result;
16823
16824   // Check the constant mask.  Invert it so that the bits being masked out are
16825   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
16826   // follow the sign bit for uniformity.
16827   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16828   unsigned NotMaskLZ = countLeadingZeros(NotMask);
16829   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
16830   unsigned NotMaskTZ = countTrailingZeros(NotMask);
16831   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
16832   if (NotMaskLZ == 64) return Result;  // All zero mask.
16833
16834   // See if we have a continuous run of bits.  If so, we have 0*1+0*
16835   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16836     return Result;
16837
16838   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16839   if (V.getValueType() != MVT::i64 && NotMaskLZ)
16840     NotMaskLZ -= 64-V.getValueSizeInBits();
16841
16842   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16843   switch (MaskedBytes) {
16844   case 1:
16845   case 2:
16846   case 4: break;
16847   default: return Result; // All one mask, or 5-byte mask.
16848   }
16849
16850   // Verify that the first bit starts at a multiple of mask so that the access
16851   // is aligned the same as the access width.
16852   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16853
16854   // For narrowing to be valid, it must be the case that the load the
16855   // immediately preceding memory operation before the store.
16856   if (LD == Chain.getNode())
16857     ; // ok.
16858   else if (Chain->getOpcode() == ISD::TokenFactor &&
16859            SDValue(LD, 1).hasOneUse()) {
16860     // LD has only 1 chain use so they are no indirect dependencies.
16861     if (!LD->isOperandOf(Chain.getNode()))
16862       return Result;
16863   } else
16864     return Result; // Fail.
16865
16866   Result.first = MaskedBytes;
16867   Result.second = NotMaskTZ/8;
16868   return Result;
16869 }
16870
16871 /// Check to see if IVal is something that provides a value as specified by
16872 /// MaskInfo. If so, replace the specified store with a narrower store of
16873 /// truncated IVal.
16874 static SDValue
16875 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
16876                                 SDValue IVal, StoreSDNode *St,
16877                                 DAGCombiner *DC) {
16878   unsigned NumBytes = MaskInfo.first;
16879   unsigned ByteShift = MaskInfo.second;
16880   SelectionDAG &DAG = DC->getDAG();
16881
16882   // Check to see if IVal is all zeros in the part being masked in by the 'or'
16883   // that uses this.  If not, this is not a replacement.
16884   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
16885                                   ByteShift*8, (ByteShift+NumBytes)*8);
16886   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
16887
16888   // Check that it is legal on the target to do this.  It is legal if the new
16889   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
16890   // legalization (and the target doesn't explicitly think this is a bad idea).
16891   MVT VT = MVT::getIntegerVT(NumBytes * 8);
16892   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16893   if (!DC->isTypeLegal(VT))
16894     return SDValue();
16895   if (St->getMemOperand() &&
16896       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16897                               *St->getMemOperand()))
16898     return SDValue();
16899
16900   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
16901   // shifted by ByteShift and truncated down to NumBytes.
16902   if (ByteShift) {
16903     SDLoc DL(IVal);
16904     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
16905                        DAG.getConstant(ByteShift*8, DL,
16906                                     DC->getShiftAmountTy(IVal.getValueType())));
16907   }
16908
16909   // Figure out the offset for the store and the alignment of the access.
16910   unsigned StOffset;
16911   if (DAG.getDataLayout().isLittleEndian())
16912     StOffset = ByteShift;
16913   else
16914     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
16915
16916   SDValue Ptr = St->getBasePtr();
16917   if (StOffset) {
16918     SDLoc DL(IVal);
16919     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
16920   }
16921
16922   // Truncate down to the new size.
16923   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
16924
16925   ++OpsNarrowed;
16926   return DAG
16927       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
16928                 St->getPointerInfo().getWithOffset(StOffset),
16929                 St->getOriginalAlign());
16930 }
16931
16932 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
16933 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
16934 /// narrowing the load and store if it would end up being a win for performance
16935 /// or code size.
16936 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
16937   StoreSDNode *ST  = cast<StoreSDNode>(N);
16938   if (!ST->isSimple())
16939     return SDValue();
16940
16941   SDValue Chain = ST->getChain();
16942   SDValue Value = ST->getValue();
16943   SDValue Ptr   = ST->getBasePtr();
16944   EVT VT = Value.getValueType();
16945
16946   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
16947     return SDValue();
16948
16949   unsigned Opc = Value.getOpcode();
16950
16951   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
16952   // is a byte mask indicating a consecutive number of bytes, check to see if
16953   // Y is known to provide just those bytes.  If so, we try to replace the
16954   // load + replace + store sequence with a single (narrower) store, which makes
16955   // the load dead.
16956   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
16957     std::pair<unsigned, unsigned> MaskedLoad;
16958     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
16959     if (MaskedLoad.first)
16960       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16961                                                   Value.getOperand(1), ST,this))
16962         return NewST;
16963
16964     // Or is commutative, so try swapping X and Y.
16965     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
16966     if (MaskedLoad.first)
16967       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16968                                                   Value.getOperand(0), ST,this))
16969         return NewST;
16970   }
16971
16972   if (!EnableReduceLoadOpStoreWidth)
16973     return SDValue();
16974
16975   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
16976       Value.getOperand(1).getOpcode() != ISD::Constant)
16977     return SDValue();
16978
16979   SDValue N0 = Value.getOperand(0);
16980   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16981       Chain == SDValue(N0.getNode(), 1)) {
16982     LoadSDNode *LD = cast<LoadSDNode>(N0);
16983     if (LD->getBasePtr() != Ptr ||
16984         LD->getPointerInfo().getAddrSpace() !=
16985         ST->getPointerInfo().getAddrSpace())
16986       return SDValue();
16987
16988     // Find the type to narrow it the load / op / store to.
16989     SDValue N1 = Value.getOperand(1);
16990     unsigned BitWidth = N1.getValueSizeInBits();
16991     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
16992     if (Opc == ISD::AND)
16993       Imm ^= APInt::getAllOnes(BitWidth);
16994     if (Imm == 0 || Imm.isAllOnes())
16995       return SDValue();
16996     unsigned ShAmt = Imm.countTrailingZeros();
16997     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
16998     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
16999     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17000     // The narrowing should be profitable, the load/store operation should be
17001     // legal (or custom) and the store size should be equal to the NewVT width.
17002     while (NewBW < BitWidth &&
17003            (NewVT.getStoreSizeInBits() != NewBW ||
17004             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
17005             !TLI.isNarrowingProfitable(VT, NewVT))) {
17006       NewBW = NextPowerOf2(NewBW);
17007       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17008     }
17009     if (NewBW >= BitWidth)
17010       return SDValue();
17011
17012     // If the lsb changed does not start at the type bitwidth boundary,
17013     // start at the previous one.
17014     if (ShAmt % NewBW)
17015       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
17016     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
17017                                    std::min(BitWidth, ShAmt + NewBW));
17018     if ((Imm & Mask) == Imm) {
17019       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
17020       if (Opc == ISD::AND)
17021         NewImm ^= APInt::getAllOnes(NewBW);
17022       uint64_t PtrOff = ShAmt / 8;
17023       // For big endian targets, we need to adjust the offset to the pointer to
17024       // load the correct bytes.
17025       if (DAG.getDataLayout().isBigEndian())
17026         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
17027
17028       bool IsFast = false;
17029       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
17030       if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
17031                                   LD->getAddressSpace(), NewAlign,
17032                                   LD->getMemOperand()->getFlags(), &IsFast) ||
17033           !IsFast)
17034         return SDValue();
17035
17036       SDValue NewPtr =
17037           DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
17038       SDValue NewLD =
17039           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
17040                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
17041                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
17042       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
17043                                    DAG.getConstant(NewImm, SDLoc(Value),
17044                                                    NewVT));
17045       SDValue NewST =
17046           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
17047                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
17048
17049       AddToWorklist(NewPtr.getNode());
17050       AddToWorklist(NewLD.getNode());
17051       AddToWorklist(NewVal.getNode());
17052       WorklistRemover DeadNodes(*this);
17053       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
17054       ++OpsNarrowed;
17055       return NewST;
17056     }
17057   }
17058
17059   return SDValue();
17060 }
17061
17062 /// For a given floating point load / store pair, if the load value isn't used
17063 /// by any other operations, then consider transforming the pair to integer
17064 /// load / store operations if the target deems the transformation profitable.
17065 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
17066   StoreSDNode *ST  = cast<StoreSDNode>(N);
17067   SDValue Value = ST->getValue();
17068   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
17069       Value.hasOneUse()) {
17070     LoadSDNode *LD = cast<LoadSDNode>(Value);
17071     EVT VT = LD->getMemoryVT();
17072     if (!VT.isFloatingPoint() ||
17073         VT != ST->getMemoryVT() ||
17074         LD->isNonTemporal() ||
17075         ST->isNonTemporal() ||
17076         LD->getPointerInfo().getAddrSpace() != 0 ||
17077         ST->getPointerInfo().getAddrSpace() != 0)
17078       return SDValue();
17079
17080     TypeSize VTSize = VT.getSizeInBits();
17081
17082     // We don't know the size of scalable types at compile time so we cannot
17083     // create an integer of the equivalent size.
17084     if (VTSize.isScalable())
17085       return SDValue();
17086
17087     bool FastLD = false, FastST = false;
17088     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
17089     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
17090         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
17091         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
17092         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
17093         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17094                                 *LD->getMemOperand(), &FastLD) ||
17095         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17096                                 *ST->getMemOperand(), &FastST) ||
17097         !FastLD || !FastST)
17098       return SDValue();
17099
17100     SDValue NewLD =
17101         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
17102                     LD->getPointerInfo(), LD->getAlign());
17103
17104     SDValue NewST =
17105         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
17106                      ST->getPointerInfo(), ST->getAlign());
17107
17108     AddToWorklist(NewLD.getNode());
17109     AddToWorklist(NewST.getNode());
17110     WorklistRemover DeadNodes(*this);
17111     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
17112     ++LdStFP2Int;
17113     return NewST;
17114   }
17115
17116   return SDValue();
17117 }
17118
17119 // This is a helper function for visitMUL to check the profitability
17120 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
17121 // MulNode is the original multiply, AddNode is (add x, c1),
17122 // and ConstNode is c2.
17123 //
17124 // If the (add x, c1) has multiple uses, we could increase
17125 // the number of adds if we make this transformation.
17126 // It would only be worth doing this if we can remove a
17127 // multiply in the process. Check for that here.
17128 // To illustrate:
17129 //     (A + c1) * c3
17130 //     (A + c2) * c3
17131 // We're checking for cases where we have common "c3 * A" expressions.
17132 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
17133                                               SDValue &AddNode,
17134                                               SDValue &ConstNode) {
17135   APInt Val;
17136
17137   // If the add only has one use, and the target thinks the folding is
17138   // profitable or does not lead to worse code, this would be OK to do.
17139   if (AddNode.getNode()->hasOneUse() &&
17140       TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
17141     return true;
17142
17143   // Walk all the users of the constant with which we're multiplying.
17144   for (SDNode *Use : ConstNode->uses()) {
17145     if (Use == MulNode) // This use is the one we're on right now. Skip it.
17146       continue;
17147
17148     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
17149       SDNode *OtherOp;
17150       SDNode *MulVar = AddNode.getOperand(0).getNode();
17151
17152       // OtherOp is what we're multiplying against the constant.
17153       if (Use->getOperand(0) == ConstNode)
17154         OtherOp = Use->getOperand(1).getNode();
17155       else
17156         OtherOp = Use->getOperand(0).getNode();
17157
17158       // Check to see if multiply is with the same operand of our "add".
17159       //
17160       //     ConstNode  = CONST
17161       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
17162       //     ...
17163       //     AddNode  = (A + c1)  <-- MulVar is A.
17164       //         = AddNode * ConstNode   <-- current visiting instruction.
17165       //
17166       // If we make this transformation, we will have a common
17167       // multiply (ConstNode * A) that we can save.
17168       if (OtherOp == MulVar)
17169         return true;
17170
17171       // Now check to see if a future expansion will give us a common
17172       // multiply.
17173       //
17174       //     ConstNode  = CONST
17175       //     AddNode    = (A + c1)
17176       //     ...   = AddNode * ConstNode <-- current visiting instruction.
17177       //     ...
17178       //     OtherOp = (A + c2)
17179       //     Use     = OtherOp * ConstNode <-- visiting Use.
17180       //
17181       // If we make this transformation, we will have a common
17182       // multiply (CONST * A) after we also do the same transformation
17183       // to the "t2" instruction.
17184       if (OtherOp->getOpcode() == ISD::ADD &&
17185           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
17186           OtherOp->getOperand(0).getNode() == MulVar)
17187         return true;
17188     }
17189   }
17190
17191   // Didn't find a case where this would be profitable.
17192   return false;
17193 }
17194
17195 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
17196                                          unsigned NumStores) {
17197   SmallVector<SDValue, 8> Chains;
17198   SmallPtrSet<const SDNode *, 8> Visited;
17199   SDLoc StoreDL(StoreNodes[0].MemNode);
17200
17201   for (unsigned i = 0; i < NumStores; ++i) {
17202     Visited.insert(StoreNodes[i].MemNode);
17203   }
17204
17205   // don't include nodes that are children or repeated nodes.
17206   for (unsigned i = 0; i < NumStores; ++i) {
17207     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
17208       Chains.push_back(StoreNodes[i].MemNode->getChain());
17209   }
17210
17211   assert(Chains.size() > 0 && "Chain should have generated a chain");
17212   return DAG.getTokenFactor(StoreDL, Chains);
17213 }
17214
17215 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
17216     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
17217     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
17218   // Make sure we have something to merge.
17219   if (NumStores < 2)
17220     return false;
17221
17222   assert((!UseTrunc || !UseVector) &&
17223          "This optimization cannot emit a vector truncating store");
17224
17225   // The latest Node in the DAG.
17226   SDLoc DL(StoreNodes[0].MemNode);
17227
17228   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
17229   unsigned SizeInBits = NumStores * ElementSizeBits;
17230   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17231
17232   Optional<MachineMemOperand::Flags> Flags;
17233   AAMDNodes AAInfo;
17234   for (unsigned I = 0; I != NumStores; ++I) {
17235     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17236     if (!Flags) {
17237       Flags = St->getMemOperand()->getFlags();
17238       AAInfo = St->getAAInfo();
17239       continue;
17240     }
17241     // Skip merging if there's an inconsistent flag.
17242     if (Flags != St->getMemOperand()->getFlags())
17243       return false;
17244     // Concatenate AA metadata.
17245     AAInfo = AAInfo.concat(St->getAAInfo());
17246   }
17247
17248   EVT StoreTy;
17249   if (UseVector) {
17250     unsigned Elts = NumStores * NumMemElts;
17251     // Get the type for the merged vector store.
17252     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17253   } else
17254     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
17255
17256   SDValue StoredVal;
17257   if (UseVector) {
17258     if (IsConstantSrc) {
17259       SmallVector<SDValue, 8> BuildVector;
17260       for (unsigned I = 0; I != NumStores; ++I) {
17261         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17262         SDValue Val = St->getValue();
17263         // If constant is of the wrong type, convert it now.
17264         if (MemVT != Val.getValueType()) {
17265           Val = peekThroughBitcasts(Val);
17266           // Deal with constants of wrong size.
17267           if (ElementSizeBits != Val.getValueSizeInBits()) {
17268             EVT IntMemVT =
17269                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
17270             if (isa<ConstantFPSDNode>(Val)) {
17271               // Not clear how to truncate FP values.
17272               return false;
17273             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
17274               Val = DAG.getConstant(C->getAPIntValue()
17275                                         .zextOrTrunc(Val.getValueSizeInBits())
17276                                         .zextOrTrunc(ElementSizeBits),
17277                                     SDLoc(C), IntMemVT);
17278           }
17279           // Make sure correctly size type is the correct type.
17280           Val = DAG.getBitcast(MemVT, Val);
17281         }
17282         BuildVector.push_back(Val);
17283       }
17284       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17285                                                : ISD::BUILD_VECTOR,
17286                               DL, StoreTy, BuildVector);
17287     } else {
17288       SmallVector<SDValue, 8> Ops;
17289       for (unsigned i = 0; i < NumStores; ++i) {
17290         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17291         SDValue Val = peekThroughBitcasts(St->getValue());
17292         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
17293         // type MemVT. If the underlying value is not the correct
17294         // type, but it is an extraction of an appropriate vector we
17295         // can recast Val to be of the correct type. This may require
17296         // converting between EXTRACT_VECTOR_ELT and
17297         // EXTRACT_SUBVECTOR.
17298         if ((MemVT != Val.getValueType()) &&
17299             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
17300              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
17301           EVT MemVTScalarTy = MemVT.getScalarType();
17302           // We may need to add a bitcast here to get types to line up.
17303           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17304             Val = DAG.getBitcast(MemVT, Val);
17305           } else {
17306             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17307                                             : ISD::EXTRACT_VECTOR_ELT;
17308             SDValue Vec = Val.getOperand(0);
17309             SDValue Idx = Val.getOperand(1);
17310             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17311           }
17312         }
17313         Ops.push_back(Val);
17314       }
17315
17316       // Build the extracted vector elements back into a vector.
17317       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17318                                                : ISD::BUILD_VECTOR,
17319                               DL, StoreTy, Ops);
17320     }
17321   } else {
17322     // We should always use a vector store when merging extracted vector
17323     // elements, so this path implies a store of constants.
17324     assert(IsConstantSrc && "Merged vector elements should use vector store");
17325
17326     APInt StoreInt(SizeInBits, 0);
17327
17328     // Construct a single integer constant which is made of the smaller
17329     // constant inputs.
17330     bool IsLE = DAG.getDataLayout().isLittleEndian();
17331     for (unsigned i = 0; i < NumStores; ++i) {
17332       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17333       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17334
17335       SDValue Val = St->getValue();
17336       Val = peekThroughBitcasts(Val);
17337       StoreInt <<= ElementSizeBits;
17338       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17339         StoreInt |= C->getAPIntValue()
17340                         .zextOrTrunc(ElementSizeBits)
17341                         .zextOrTrunc(SizeInBits);
17342       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17343         StoreInt |= C->getValueAPF()
17344                         .bitcastToAPInt()
17345                         .zextOrTrunc(ElementSizeBits)
17346                         .zextOrTrunc(SizeInBits);
17347         // If fp truncation is necessary give up for now.
17348         if (MemVT.getSizeInBits() != ElementSizeBits)
17349           return false;
17350       } else {
17351         llvm_unreachable("Invalid constant element type");
17352       }
17353     }
17354
17355     // Create the new Load and Store operations.
17356     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
17357   }
17358
17359   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17360   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
17361
17362   // make sure we use trunc store if it's necessary to be legal.
17363   SDValue NewStore;
17364   if (!UseTrunc) {
17365     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17366                             FirstInChain->getPointerInfo(),
17367                             FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17368   } else { // Must be realized as a trunc store
17369     EVT LegalizedStoredValTy =
17370         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17371     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17372     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17373     SDValue ExtendedStoreVal =
17374         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17375                         LegalizedStoredValTy);
17376     NewStore = DAG.getTruncStore(
17377         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17378         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17379         FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17380   }
17381
17382   // Replace all merged stores with the new store.
17383   for (unsigned i = 0; i < NumStores; ++i)
17384     CombineTo(StoreNodes[i].MemNode, NewStore);
17385
17386   AddToWorklist(NewChain.getNode());
17387   return true;
17388 }
17389
17390 void DAGCombiner::getStoreMergeCandidates(
17391     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17392     SDNode *&RootNode) {
17393   // This holds the base pointer, index, and the offset in bytes from the base
17394   // pointer. We must have a base and an offset. Do not handle stores to undef
17395   // base pointers.
17396   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17397   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17398     return;
17399
17400   SDValue Val = peekThroughBitcasts(St->getValue());
17401   StoreSource StoreSrc = getStoreSource(Val);
17402   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
17403
17404   // Match on loadbaseptr if relevant.
17405   EVT MemVT = St->getMemoryVT();
17406   BaseIndexOffset LBasePtr;
17407   EVT LoadVT;
17408   if (StoreSrc == StoreSource::Load) {
17409     auto *Ld = cast<LoadSDNode>(Val);
17410     LBasePtr = BaseIndexOffset::match(Ld, DAG);
17411     LoadVT = Ld->getMemoryVT();
17412     // Load and store should be the same type.
17413     if (MemVT != LoadVT)
17414       return;
17415     // Loads must only have one use.
17416     if (!Ld->hasNUsesOfValue(1, 0))
17417       return;
17418     // The memory operands must not be volatile/indexed/atomic.
17419     // TODO: May be able to relax for unordered atomics (see D66309)
17420     if (!Ld->isSimple() || Ld->isIndexed())
17421       return;
17422   }
17423   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17424                             int64_t &Offset) -> bool {
17425     // The memory operands must not be volatile/indexed/atomic.
17426     // TODO: May be able to relax for unordered atomics (see D66309)
17427     if (!Other->isSimple() || Other->isIndexed())
17428       return false;
17429     // Don't mix temporal stores with non-temporal stores.
17430     if (St->isNonTemporal() != Other->isNonTemporal())
17431       return false;
17432     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
17433     // Allow merging constants of different types as integers.
17434     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17435                                            : Other->getMemoryVT() != MemVT;
17436     switch (StoreSrc) {
17437     case StoreSource::Load: {
17438       if (NoTypeMatch)
17439         return false;
17440       // The Load's Base Ptr must also match.
17441       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
17442       if (!OtherLd)
17443         return false;
17444       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
17445       if (LoadVT != OtherLd->getMemoryVT())
17446         return false;
17447       // Loads must only have one use.
17448       if (!OtherLd->hasNUsesOfValue(1, 0))
17449         return false;
17450       // The memory operands must not be volatile/indexed/atomic.
17451       // TODO: May be able to relax for unordered atomics (see D66309)
17452       if (!OtherLd->isSimple() || OtherLd->isIndexed())
17453         return false;
17454       // Don't mix temporal loads with non-temporal loads.
17455       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17456         return false;
17457       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17458         return false;
17459       break;
17460     }
17461     case StoreSource::Constant:
17462       if (NoTypeMatch)
17463         return false;
17464       if (!isIntOrFPConstant(OtherBC))
17465         return false;
17466       break;
17467     case StoreSource::Extract:
17468       // Do not merge truncated stores here.
17469       if (Other->isTruncatingStore())
17470         return false;
17471       if (!MemVT.bitsEq(OtherBC.getValueType()))
17472         return false;
17473       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17474           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17475         return false;
17476       break;
17477     default:
17478       llvm_unreachable("Unhandled store source for merging");
17479     }
17480     Ptr = BaseIndexOffset::match(Other, DAG);
17481     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17482   };
17483
17484   // Check if the pair of StoreNode and the RootNode already bail out many
17485   // times which is over the limit in dependence check.
17486   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
17487                                         SDNode *RootNode) -> bool {
17488     auto RootCount = StoreRootCountMap.find(StoreNode);
17489     return RootCount != StoreRootCountMap.end() &&
17490            RootCount->second.first == RootNode &&
17491            RootCount->second.second > StoreMergeDependenceLimit;
17492   };
17493
17494   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
17495     // This must be a chain use.
17496     if (UseIter.getOperandNo() != 0)
17497       return;
17498     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
17499       BaseIndexOffset Ptr;
17500       int64_t PtrDiff;
17501       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17502           !OverLimitInDependenceCheck(OtherStore, RootNode))
17503         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17504     }
17505   };
17506
17507   // We looking for a root node which is an ancestor to all mergable
17508   // stores. We search up through a load, to our root and then down
17509   // through all children. For instance we will find Store{1,2,3} if
17510   // St is Store1, Store2. or Store3 where the root is not a load
17511   // which always true for nonvolatile ops. TODO: Expand
17512   // the search to find all valid candidates through multiple layers of loads.
17513   //
17514   // Root
17515   // |-------|-------|
17516   // Load    Load    Store3
17517   // |       |
17518   // Store1   Store2
17519   //
17520   // FIXME: We should be able to climb and
17521   // descend TokenFactors to find candidates as well.
17522
17523   RootNode = St->getChain().getNode();
17524
17525   unsigned NumNodesExplored = 0;
17526   const unsigned MaxSearchNodes = 1024;
17527   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17528     RootNode = Ldn->getChain().getNode();
17529     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17530          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17531       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17532         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17533           TryToAddCandidate(I2);
17534       }
17535       // Check stores that depend on the root (e.g. Store 3 in the chart above).
17536       if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
17537         TryToAddCandidate(I);
17538       }
17539     }
17540   } else {
17541     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17542          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17543       TryToAddCandidate(I);
17544   }
17545 }
17546
17547 // We need to check that merging these stores does not cause a loop in
17548 // the DAG. Any store candidate may depend on another candidate
17549 // indirectly through its operand (we already consider dependencies
17550 // through the chain). Check in parallel by searching up from
17551 // non-chain operands of candidates.
17552 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17553     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17554     SDNode *RootNode) {
17555   // FIXME: We should be able to truncate a full search of
17556   // predecessors by doing a BFS and keeping tabs the originating
17557   // stores from which worklist nodes come from in a similar way to
17558   // TokenFactor simplfication.
17559
17560   SmallPtrSet<const SDNode *, 32> Visited;
17561   SmallVector<const SDNode *, 8> Worklist;
17562
17563   // RootNode is a predecessor to all candidates so we need not search
17564   // past it. Add RootNode (peeking through TokenFactors). Do not count
17565   // these towards size check.
17566
17567   Worklist.push_back(RootNode);
17568   while (!Worklist.empty()) {
17569     auto N = Worklist.pop_back_val();
17570     if (!Visited.insert(N).second)
17571       continue; // Already present in Visited.
17572     if (N->getOpcode() == ISD::TokenFactor) {
17573       for (SDValue Op : N->ops())
17574         Worklist.push_back(Op.getNode());
17575     }
17576   }
17577
17578   // Don't count pruning nodes towards max.
17579   unsigned int Max = 1024 + Visited.size();
17580   // Search Ops of store candidates.
17581   for (unsigned i = 0; i < NumStores; ++i) {
17582     SDNode *N = StoreNodes[i].MemNode;
17583     // Of the 4 Store Operands:
17584     //   * Chain (Op 0) -> We have already considered these
17585     //                    in candidate selection and can be
17586     //                    safely ignored
17587     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17588     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17589     //                       but aren't necessarily fromt the same base node, so
17590     //                       cycles possible (e.g. via indexed store).
17591     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17592     //               non-indexed stores). Not constant on all targets (e.g. ARM)
17593     //               and so can participate in a cycle.
17594     for (unsigned j = 1; j < N->getNumOperands(); ++j)
17595       Worklist.push_back(N->getOperand(j).getNode());
17596   }
17597   // Search through DAG. We can stop early if we find a store node.
17598   for (unsigned i = 0; i < NumStores; ++i)
17599     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17600                                      Max)) {
17601       // If the searching bail out, record the StoreNode and RootNode in the
17602       // StoreRootCountMap. If we have seen the pair many times over a limit,
17603       // we won't add the StoreNode into StoreNodes set again.
17604       if (Visited.size() >= Max) {
17605         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17606         if (RootCount.first == RootNode)
17607           RootCount.second++;
17608         else
17609           RootCount = {RootNode, 1};
17610       }
17611       return false;
17612     }
17613   return true;
17614 }
17615
17616 unsigned
17617 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17618                                   int64_t ElementSizeBytes) const {
17619   while (true) {
17620     // Find a store past the width of the first store.
17621     size_t StartIdx = 0;
17622     while ((StartIdx + 1 < StoreNodes.size()) &&
17623            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17624               StoreNodes[StartIdx + 1].OffsetFromBase)
17625       ++StartIdx;
17626
17627     // Bail if we don't have enough candidates to merge.
17628     if (StartIdx + 1 >= StoreNodes.size())
17629       return 0;
17630
17631     // Trim stores that overlapped with the first store.
17632     if (StartIdx)
17633       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17634
17635     // Scan the memory operations on the chain and find the first
17636     // non-consecutive store memory address.
17637     unsigned NumConsecutiveStores = 1;
17638     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17639     // Check that the addresses are consecutive starting from the second
17640     // element in the list of stores.
17641     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17642       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17643       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17644         break;
17645       NumConsecutiveStores = i + 1;
17646     }
17647     if (NumConsecutiveStores > 1)
17648       return NumConsecutiveStores;
17649
17650     // There are no consecutive stores at the start of the list.
17651     // Remove the first store and try again.
17652     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17653   }
17654 }
17655
17656 bool DAGCombiner::tryStoreMergeOfConstants(
17657     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17658     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17659   LLVMContext &Context = *DAG.getContext();
17660   const DataLayout &DL = DAG.getDataLayout();
17661   int64_t ElementSizeBytes = MemVT.getStoreSize();
17662   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17663   bool MadeChange = false;
17664
17665   // Store the constants into memory as one consecutive store.
17666   while (NumConsecutiveStores >= 2) {
17667     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17668     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17669     unsigned FirstStoreAlign = FirstInChain->getAlignment();
17670     unsigned LastLegalType = 1;
17671     unsigned LastLegalVectorType = 1;
17672     bool LastIntegerTrunc = false;
17673     bool NonZero = false;
17674     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17675     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17676       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17677       SDValue StoredVal = ST->getValue();
17678       bool IsElementZero = false;
17679       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17680         IsElementZero = C->isZero();
17681       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17682         IsElementZero = C->getConstantFPValue()->isNullValue();
17683       if (IsElementZero) {
17684         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17685           FirstZeroAfterNonZero = i;
17686       }
17687       NonZero |= !IsElementZero;
17688
17689       // Find a legal type for the constant store.
17690       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17691       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17692       bool IsFast = false;
17693
17694       // Break early when size is too large to be legal.
17695       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17696         break;
17697
17698       if (TLI.isTypeLegal(StoreTy) &&
17699           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17700                                DAG.getMachineFunction()) &&
17701           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17702                                  *FirstInChain->getMemOperand(), &IsFast) &&
17703           IsFast) {
17704         LastIntegerTrunc = false;
17705         LastLegalType = i + 1;
17706         // Or check whether a truncstore is legal.
17707       } else if (TLI.getTypeAction(Context, StoreTy) ==
17708                  TargetLowering::TypePromoteInteger) {
17709         EVT LegalizedStoredValTy =
17710             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17711         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17712             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17713                                  DAG.getMachineFunction()) &&
17714             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17715                                    *FirstInChain->getMemOperand(), &IsFast) &&
17716             IsFast) {
17717           LastIntegerTrunc = true;
17718           LastLegalType = i + 1;
17719         }
17720       }
17721
17722       // We only use vectors if the constant is known to be zero or the
17723       // target allows it and the function is not marked with the
17724       // noimplicitfloat attribute.
17725       if ((!NonZero ||
17726            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17727           AllowVectors) {
17728         // Find a legal type for the vector store.
17729         unsigned Elts = (i + 1) * NumMemElts;
17730         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17731         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17732             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17733             TLI.allowsMemoryAccess(Context, DL, Ty,
17734                                    *FirstInChain->getMemOperand(), &IsFast) &&
17735             IsFast)
17736           LastLegalVectorType = i + 1;
17737       }
17738     }
17739
17740     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17741     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17742     bool UseTrunc = LastIntegerTrunc && !UseVector;
17743
17744     // Check if we found a legal integer type that creates a meaningful
17745     // merge.
17746     if (NumElem < 2) {
17747       // We know that candidate stores are in order and of correct
17748       // shape. While there is no mergeable sequence from the
17749       // beginning one may start later in the sequence. The only
17750       // reason a merge of size N could have failed where another of
17751       // the same size would not have, is if the alignment has
17752       // improved or we've dropped a non-zero value. Drop as many
17753       // candidates as we can here.
17754       unsigned NumSkip = 1;
17755       while ((NumSkip < NumConsecutiveStores) &&
17756              (NumSkip < FirstZeroAfterNonZero) &&
17757              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17758         NumSkip++;
17759
17760       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17761       NumConsecutiveStores -= NumSkip;
17762       continue;
17763     }
17764
17765     // Check that we can merge these candidates without causing a cycle.
17766     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17767                                                   RootNode)) {
17768       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17769       NumConsecutiveStores -= NumElem;
17770       continue;
17771     }
17772
17773     MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
17774                                                   /*IsConstantSrc*/ true,
17775                                                   UseVector, UseTrunc);
17776
17777     // Remove merged stores for next iteration.
17778     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17779     NumConsecutiveStores -= NumElem;
17780   }
17781   return MadeChange;
17782 }
17783
17784 bool DAGCombiner::tryStoreMergeOfExtracts(
17785     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17786     EVT MemVT, SDNode *RootNode) {
17787   LLVMContext &Context = *DAG.getContext();
17788   const DataLayout &DL = DAG.getDataLayout();
17789   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17790   bool MadeChange = false;
17791
17792   // Loop on Consecutive Stores on success.
17793   while (NumConsecutiveStores >= 2) {
17794     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17795     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17796     unsigned FirstStoreAlign = FirstInChain->getAlignment();
17797     unsigned NumStoresToMerge = 1;
17798     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17799       // Find a legal type for the vector store.
17800       unsigned Elts = (i + 1) * NumMemElts;
17801       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17802       bool IsFast = false;
17803
17804       // Break early when size is too large to be legal.
17805       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17806         break;
17807
17808       if (TLI.isTypeLegal(Ty) &&
17809           TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17810           TLI.allowsMemoryAccess(Context, DL, Ty,
17811                                  *FirstInChain->getMemOperand(), &IsFast) &&
17812           IsFast)
17813         NumStoresToMerge = i + 1;
17814     }
17815
17816     // Check if we found a legal integer type creating a meaningful
17817     // merge.
17818     if (NumStoresToMerge < 2) {
17819       // We know that candidate stores are in order and of correct
17820       // shape. While there is no mergeable sequence from the
17821       // beginning one may start later in the sequence. The only
17822       // reason a merge of size N could have failed where another of
17823       // the same size would not have, is if the alignment has
17824       // improved. Drop as many candidates as we can here.
17825       unsigned NumSkip = 1;
17826       while ((NumSkip < NumConsecutiveStores) &&
17827              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17828         NumSkip++;
17829
17830       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17831       NumConsecutiveStores -= NumSkip;
17832       continue;
17833     }
17834
17835     // Check that we can merge these candidates without causing a cycle.
17836     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17837                                                   RootNode)) {
17838       StoreNodes.erase(StoreNodes.begin(),
17839                        StoreNodes.begin() + NumStoresToMerge);
17840       NumConsecutiveStores -= NumStoresToMerge;
17841       continue;
17842     }
17843
17844     MadeChange |= mergeStoresOfConstantsOrVecElts(
17845         StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
17846         /*UseVector*/ true, /*UseTrunc*/ false);
17847
17848     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17849     NumConsecutiveStores -= NumStoresToMerge;
17850   }
17851   return MadeChange;
17852 }
17853
17854 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17855                                        unsigned NumConsecutiveStores, EVT MemVT,
17856                                        SDNode *RootNode, bool AllowVectors,
17857                                        bool IsNonTemporalStore,
17858                                        bool IsNonTemporalLoad) {
17859   LLVMContext &Context = *DAG.getContext();
17860   const DataLayout &DL = DAG.getDataLayout();
17861   int64_t ElementSizeBytes = MemVT.getStoreSize();
17862   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17863   bool MadeChange = false;
17864
17865   // Look for load nodes which are used by the stored values.
17866   SmallVector<MemOpLink, 8> LoadNodes;
17867
17868   // Find acceptable loads. Loads need to have the same chain (token factor),
17869   // must not be zext, volatile, indexed, and they must be consecutive.
17870   BaseIndexOffset LdBasePtr;
17871
17872   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17873     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17874     SDValue Val = peekThroughBitcasts(St->getValue());
17875     LoadSDNode *Ld = cast<LoadSDNode>(Val);
17876
17877     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
17878     // If this is not the first ptr that we check.
17879     int64_t LdOffset = 0;
17880     if (LdBasePtr.getBase().getNode()) {
17881       // The base ptr must be the same.
17882       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
17883         break;
17884     } else {
17885       // Check that all other base pointers are the same as this one.
17886       LdBasePtr = LdPtr;
17887     }
17888
17889     // We found a potential memory operand to merge.
17890     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
17891   }
17892
17893   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
17894     Align RequiredAlignment;
17895     bool NeedRotate = false;
17896     if (LoadNodes.size() == 2) {
17897       // If we have load/store pair instructions and we only have two values,
17898       // don't bother merging.
17899       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
17900           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
17901         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
17902         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
17903         break;
17904       }
17905       // If the loads are reversed, see if we can rotate the halves into place.
17906       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
17907       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
17908       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
17909       if (Offset0 - Offset1 == ElementSizeBytes &&
17910           (hasOperation(ISD::ROTL, PairVT) ||
17911            hasOperation(ISD::ROTR, PairVT))) {
17912         std::swap(LoadNodes[0], LoadNodes[1]);
17913         NeedRotate = true;
17914       }
17915     }
17916     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17917     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17918     Align FirstStoreAlign = FirstInChain->getAlign();
17919     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
17920
17921     // Scan the memory operations on the chain and find the first
17922     // non-consecutive load memory address. These variables hold the index in
17923     // the store node array.
17924
17925     unsigned LastConsecutiveLoad = 1;
17926
17927     // This variable refers to the size and not index in the array.
17928     unsigned LastLegalVectorType = 1;
17929     unsigned LastLegalIntegerType = 1;
17930     bool isDereferenceable = true;
17931     bool DoIntegerTruncate = false;
17932     int64_t StartAddress = LoadNodes[0].OffsetFromBase;
17933     SDValue LoadChain = FirstLoad->getChain();
17934     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
17935       // All loads must share the same chain.
17936       if (LoadNodes[i].MemNode->getChain() != LoadChain)
17937         break;
17938
17939       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
17940       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17941         break;
17942       LastConsecutiveLoad = i;
17943
17944       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
17945         isDereferenceable = false;
17946
17947       // Find a legal type for the vector store.
17948       unsigned Elts = (i + 1) * NumMemElts;
17949       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17950
17951       // Break early when size is too large to be legal.
17952       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17953         break;
17954
17955       bool IsFastSt = false;
17956       bool IsFastLd = false;
17957       // Don't try vector types if we need a rotate. We may still fail the
17958       // legality checks for the integer type, but we can't handle the rotate
17959       // case with vectors.
17960       // FIXME: We could use a shuffle in place of the rotate.
17961       if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
17962           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17963                                DAG.getMachineFunction()) &&
17964           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17965                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
17966           IsFastSt &&
17967           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17968                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
17969           IsFastLd) {
17970         LastLegalVectorType = i + 1;
17971       }
17972
17973       // Find a legal type for the integer store.
17974       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17975       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17976       if (TLI.isTypeLegal(StoreTy) &&
17977           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17978                                DAG.getMachineFunction()) &&
17979           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17980                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
17981           IsFastSt &&
17982           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17983                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
17984           IsFastLd) {
17985         LastLegalIntegerType = i + 1;
17986         DoIntegerTruncate = false;
17987         // Or check whether a truncstore and extload is legal.
17988       } else if (TLI.getTypeAction(Context, StoreTy) ==
17989                  TargetLowering::TypePromoteInteger) {
17990         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
17991         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17992             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17993                                  DAG.getMachineFunction()) &&
17994             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17995             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17996             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
17997             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17998                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
17999             IsFastSt &&
18000             TLI.allowsMemoryAccess(Context, DL, StoreTy,
18001                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
18002             IsFastLd) {
18003           LastLegalIntegerType = i + 1;
18004           DoIntegerTruncate = true;
18005         }
18006       }
18007     }
18008
18009     // Only use vector types if the vector type is larger than the integer
18010     // type. If they are the same, use integers.
18011     bool UseVectorTy =
18012         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
18013     unsigned LastLegalType =
18014         std::max(LastLegalVectorType, LastLegalIntegerType);
18015
18016     // We add +1 here because the LastXXX variables refer to location while
18017     // the NumElem refers to array/index size.
18018     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
18019     NumElem = std::min(LastLegalType, NumElem);
18020     Align FirstLoadAlign = FirstLoad->getAlign();
18021
18022     if (NumElem < 2) {
18023       // We know that candidate stores are in order and of correct
18024       // shape. While there is no mergeable sequence from the
18025       // beginning one may start later in the sequence. The only
18026       // reason a merge of size N could have failed where another of
18027       // the same size would not have is if the alignment or either
18028       // the load or store has improved. Drop as many candidates as we
18029       // can here.
18030       unsigned NumSkip = 1;
18031       while ((NumSkip < LoadNodes.size()) &&
18032              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
18033              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18034         NumSkip++;
18035       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18036       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
18037       NumConsecutiveStores -= NumSkip;
18038       continue;
18039     }
18040
18041     // Check that we can merge these candidates without causing a cycle.
18042     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
18043                                                   RootNode)) {
18044       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18045       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18046       NumConsecutiveStores -= NumElem;
18047       continue;
18048     }
18049
18050     // Find if it is better to use vectors or integers to load and store
18051     // to memory.
18052     EVT JointMemOpVT;
18053     if (UseVectorTy) {
18054       // Find a legal type for the vector store.
18055       unsigned Elts = NumElem * NumMemElts;
18056       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18057     } else {
18058       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
18059       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
18060     }
18061
18062     SDLoc LoadDL(LoadNodes[0].MemNode);
18063     SDLoc StoreDL(StoreNodes[0].MemNode);
18064
18065     // The merged loads are required to have the same incoming chain, so
18066     // using the first's chain is acceptable.
18067
18068     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
18069     AddToWorklist(NewStoreChain.getNode());
18070
18071     MachineMemOperand::Flags LdMMOFlags =
18072         isDereferenceable ? MachineMemOperand::MODereferenceable
18073                           : MachineMemOperand::MONone;
18074     if (IsNonTemporalLoad)
18075       LdMMOFlags |= MachineMemOperand::MONonTemporal;
18076
18077     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
18078                                               ? MachineMemOperand::MONonTemporal
18079                                               : MachineMemOperand::MONone;
18080
18081     SDValue NewLoad, NewStore;
18082     if (UseVectorTy || !DoIntegerTruncate) {
18083       NewLoad = DAG.getLoad(
18084           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
18085           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
18086       SDValue StoreOp = NewLoad;
18087       if (NeedRotate) {
18088         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
18089         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
18090                "Unexpected type for rotate-able load pair");
18091         SDValue RotAmt =
18092             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
18093         // Target can convert to the identical ROTR if it does not have ROTL.
18094         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
18095       }
18096       NewStore = DAG.getStore(
18097           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
18098           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
18099     } else { // This must be the truncstore/extload case
18100       EVT ExtendedTy =
18101           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
18102       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
18103                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
18104                                FirstLoad->getPointerInfo(), JointMemOpVT,
18105                                FirstLoadAlign, LdMMOFlags);
18106       NewStore = DAG.getTruncStore(
18107           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
18108           FirstInChain->getPointerInfo(), JointMemOpVT,
18109           FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
18110     }
18111
18112     // Transfer chain users from old loads to the new load.
18113     for (unsigned i = 0; i < NumElem; ++i) {
18114       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
18115       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
18116                                     SDValue(NewLoad.getNode(), 1));
18117     }
18118
18119     // Replace all stores with the new store. Recursively remove corresponding
18120     // values if they are no longer used.
18121     for (unsigned i = 0; i < NumElem; ++i) {
18122       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
18123       CombineTo(StoreNodes[i].MemNode, NewStore);
18124       if (Val.getNode()->use_empty())
18125         recursivelyDeleteUnusedNodes(Val.getNode());
18126     }
18127
18128     MadeChange = true;
18129     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18130     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18131     NumConsecutiveStores -= NumElem;
18132   }
18133   return MadeChange;
18134 }
18135
18136 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
18137   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
18138     return false;
18139
18140   // TODO: Extend this function to merge stores of scalable vectors.
18141   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
18142   // store since we know <vscale x 16 x i8> is exactly twice as large as
18143   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
18144   EVT MemVT = St->getMemoryVT();
18145   if (MemVT.isScalableVector())
18146     return false;
18147   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
18148     return false;
18149
18150   // This function cannot currently deal with non-byte-sized memory sizes.
18151   int64_t ElementSizeBytes = MemVT.getStoreSize();
18152   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
18153     return false;
18154
18155   // Do not bother looking at stored values that are not constants, loads, or
18156   // extracted vector elements.
18157   SDValue StoredVal = peekThroughBitcasts(St->getValue());
18158   const StoreSource StoreSrc = getStoreSource(StoredVal);
18159   if (StoreSrc == StoreSource::Unknown)
18160     return false;
18161
18162   SmallVector<MemOpLink, 8> StoreNodes;
18163   SDNode *RootNode;
18164   // Find potential store merge candidates by searching through chain sub-DAG
18165   getStoreMergeCandidates(St, StoreNodes, RootNode);
18166
18167   // Check if there is anything to merge.
18168   if (StoreNodes.size() < 2)
18169     return false;
18170
18171   // Sort the memory operands according to their distance from the
18172   // base pointer.
18173   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
18174     return LHS.OffsetFromBase < RHS.OffsetFromBase;
18175   });
18176
18177   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
18178       Attribute::NoImplicitFloat);
18179   bool IsNonTemporalStore = St->isNonTemporal();
18180   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
18181                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
18182
18183   // Store Merge attempts to merge the lowest stores. This generally
18184   // works out as if successful, as the remaining stores are checked
18185   // after the first collection of stores is merged. However, in the
18186   // case that a non-mergeable store is found first, e.g., {p[-2],
18187   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
18188   // mergeable cases. To prevent this, we prune such stores from the
18189   // front of StoreNodes here.
18190   bool MadeChange = false;
18191   while (StoreNodes.size() > 1) {
18192     unsigned NumConsecutiveStores =
18193         getConsecutiveStores(StoreNodes, ElementSizeBytes);
18194     // There are no more stores in the list to examine.
18195     if (NumConsecutiveStores == 0)
18196       return MadeChange;
18197
18198     // We have at least 2 consecutive stores. Try to merge them.
18199     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
18200     switch (StoreSrc) {
18201     case StoreSource::Constant:
18202       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
18203                                              MemVT, RootNode, AllowVectors);
18204       break;
18205
18206     case StoreSource::Extract:
18207       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
18208                                             MemVT, RootNode);
18209       break;
18210
18211     case StoreSource::Load:
18212       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
18213                                          MemVT, RootNode, AllowVectors,
18214                                          IsNonTemporalStore, IsNonTemporalLoad);
18215       break;
18216
18217     default:
18218       llvm_unreachable("Unhandled store source type");
18219     }
18220   }
18221   return MadeChange;
18222 }
18223
18224 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
18225   SDLoc SL(ST);
18226   SDValue ReplStore;
18227
18228   // Replace the chain to avoid dependency.
18229   if (ST->isTruncatingStore()) {
18230     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
18231                                   ST->getBasePtr(), ST->getMemoryVT(),
18232                                   ST->getMemOperand());
18233   } else {
18234     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
18235                              ST->getMemOperand());
18236   }
18237
18238   // Create token to keep both nodes around.
18239   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
18240                               MVT::Other, ST->getChain(), ReplStore);
18241
18242   // Make sure the new and old chains are cleaned up.
18243   AddToWorklist(Token.getNode());
18244
18245   // Don't add users to work list.
18246   return CombineTo(ST, Token, false);
18247 }
18248
18249 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
18250   SDValue Value = ST->getValue();
18251   if (Value.getOpcode() == ISD::TargetConstantFP)
18252     return SDValue();
18253
18254   if (!ISD::isNormalStore(ST))
18255     return SDValue();
18256
18257   SDLoc DL(ST);
18258
18259   SDValue Chain = ST->getChain();
18260   SDValue Ptr = ST->getBasePtr();
18261
18262   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
18263
18264   // NOTE: If the original store is volatile, this transform must not increase
18265   // the number of stores.  For example, on x86-32 an f64 can be stored in one
18266   // processor operation but an i64 (which is not legal) requires two.  So the
18267   // transform should not be done in this case.
18268
18269   SDValue Tmp;
18270   switch (CFP->getSimpleValueType(0).SimpleTy) {
18271   default:
18272     llvm_unreachable("Unknown FP type");
18273   case MVT::f16:    // We don't do this for these yet.
18274   case MVT::f80:
18275   case MVT::f128:
18276   case MVT::ppcf128:
18277     return SDValue();
18278   case MVT::f32:
18279     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
18280         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18281       ;
18282       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
18283                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
18284                             MVT::i32);
18285       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
18286     }
18287
18288     return SDValue();
18289   case MVT::f64:
18290     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
18291          ST->isSimple()) ||
18292         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
18293       ;
18294       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
18295                             getZExtValue(), SDLoc(CFP), MVT::i64);
18296       return DAG.getStore(Chain, DL, Tmp,
18297                           Ptr, ST->getMemOperand());
18298     }
18299
18300     if (ST->isSimple() &&
18301         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18302       // Many FP stores are not made apparent until after legalize, e.g. for
18303       // argument passing.  Since this is so common, custom legalize the
18304       // 64-bit integer store into two 32-bit stores.
18305       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
18306       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18307       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18308       if (DAG.getDataLayout().isBigEndian())
18309         std::swap(Lo, Hi);
18310
18311       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18312       AAMDNodes AAInfo = ST->getAAInfo();
18313
18314       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18315                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18316       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18317       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18318                                  ST->getPointerInfo().getWithOffset(4),
18319                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18320       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
18321                          St0, St1);
18322     }
18323
18324     return SDValue();
18325   }
18326 }
18327
18328 SDValue DAGCombiner::visitSTORE(SDNode *N) {
18329   StoreSDNode *ST  = cast<StoreSDNode>(N);
18330   SDValue Chain = ST->getChain();
18331   SDValue Value = ST->getValue();
18332   SDValue Ptr   = ST->getBasePtr();
18333
18334   // If this is a store of a bit convert, store the input value if the
18335   // resultant store does not need a higher alignment than the original.
18336   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18337       ST->isUnindexed()) {
18338     EVT SVT = Value.getOperand(0).getValueType();
18339     // If the store is volatile, we only want to change the store type if the
18340     // resulting store is legal. Otherwise we might increase the number of
18341     // memory accesses. We don't care if the original type was legal or not
18342     // as we assume software couldn't rely on the number of accesses of an
18343     // illegal type.
18344     // TODO: May be able to relax for unordered atomics (see D66309)
18345     if (((!LegalOperations && ST->isSimple()) ||
18346          TLI.isOperationLegal(ISD::STORE, SVT)) &&
18347         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18348                                      DAG, *ST->getMemOperand())) {
18349       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18350                           ST->getMemOperand());
18351     }
18352   }
18353
18354   // Turn 'store undef, Ptr' -> nothing.
18355   if (Value.isUndef() && ST->isUnindexed())
18356     return Chain;
18357
18358   // Try to infer better alignment information than the store already has.
18359   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18360     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18361       if (*Alignment > ST->getAlign() &&
18362           isAligned(*Alignment, ST->getSrcValueOffset())) {
18363         SDValue NewStore =
18364             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18365                               ST->getMemoryVT(), *Alignment,
18366                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
18367         // NewStore will always be N as we are only refining the alignment
18368         assert(NewStore.getNode() == N);
18369         (void)NewStore;
18370       }
18371     }
18372   }
18373
18374   // Try transforming a pair floating point load / store ops to integer
18375   // load / store ops.
18376   if (SDValue NewST = TransformFPLoadStorePair(N))
18377     return NewST;
18378
18379   // Try transforming several stores into STORE (BSWAP).
18380   if (SDValue Store = mergeTruncStores(ST))
18381     return Store;
18382
18383   if (ST->isUnindexed()) {
18384     // Walk up chain skipping non-aliasing memory nodes, on this store and any
18385     // adjacent stores.
18386     if (findBetterNeighborChains(ST)) {
18387       // replaceStoreChain uses CombineTo, which handled all of the worklist
18388       // manipulation. Return the original node to not do anything else.
18389       return SDValue(ST, 0);
18390     }
18391     Chain = ST->getChain();
18392   }
18393
18394   // FIXME: is there such a thing as a truncating indexed store?
18395   if (ST->isTruncatingStore() && ST->isUnindexed() &&
18396       Value.getValueType().isInteger() &&
18397       (!isa<ConstantSDNode>(Value) ||
18398        !cast<ConstantSDNode>(Value)->isOpaque())) {
18399     APInt TruncDemandedBits =
18400         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18401                              ST->getMemoryVT().getScalarSizeInBits());
18402
18403     // See if we can simplify the input to this truncstore with knowledge that
18404     // only the low bits are being used.  For example:
18405     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
18406     AddToWorklist(Value.getNode());
18407     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18408       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18409                                ST->getMemOperand());
18410
18411     // Otherwise, see if we can simplify the operation with
18412     // SimplifyDemandedBits, which only works if the value has a single use.
18413     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18414       // Re-visit the store if anything changed and the store hasn't been merged
18415       // with another node (N is deleted) SimplifyDemandedBits will add Value's
18416       // node back to the worklist if necessary, but we also need to re-visit
18417       // the Store node itself.
18418       if (N->getOpcode() != ISD::DELETED_NODE)
18419         AddToWorklist(N);
18420       return SDValue(N, 0);
18421     }
18422   }
18423
18424   // If this is a load followed by a store to the same location, then the store
18425   // is dead/noop.
18426   // TODO: Can relax for unordered atomics (see D66309)
18427   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
18428     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18429         ST->isUnindexed() && ST->isSimple() &&
18430         Ld->getAddressSpace() == ST->getAddressSpace() &&
18431         // There can't be any side effects between the load and store, such as
18432         // a call or store.
18433         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18434       // The store is dead, remove it.
18435       return Chain;
18436     }
18437   }
18438
18439   // TODO: Can relax for unordered atomics (see D66309)
18440   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18441     if (ST->isUnindexed() && ST->isSimple() &&
18442         ST1->isUnindexed() && ST1->isSimple()) {
18443       if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
18444           ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
18445           ST->getAddressSpace() == ST1->getAddressSpace()) {
18446         // If this is a store followed by a store with the same value to the
18447         // same location, then the store is dead/noop.
18448         return Chain;
18449       }
18450
18451       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18452           !ST1->getBasePtr().isUndef() &&
18453           // BaseIndexOffset and the code below requires knowing the size
18454           // of a vector, so bail out if MemoryVT is scalable.
18455           !ST->getMemoryVT().isScalableVector() &&
18456           !ST1->getMemoryVT().isScalableVector() &&
18457           ST->getAddressSpace() == ST1->getAddressSpace()) {
18458         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
18459         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
18460         unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18461         unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18462         // If this is a store who's preceding store to a subset of the current
18463         // location and no one other node is chained to that store we can
18464         // effectively drop the store. Do not remove stores to undef as they may
18465         // be used as data sinks.
18466         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18467           CombineTo(ST1, ST1->getChain());
18468           return SDValue();
18469         }
18470       }
18471     }
18472   }
18473
18474   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18475   // truncating store.  We can do this even if this is already a truncstore.
18476   if ((Value.getOpcode() == ISD::FP_ROUND ||
18477        Value.getOpcode() == ISD::TRUNCATE) &&
18478       Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18479       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18480                                ST->getMemoryVT(), LegalOperations)) {
18481     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18482                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
18483   }
18484
18485   // Always perform this optimization before types are legal. If the target
18486   // prefers, also try this after legalization to catch stores that were created
18487   // by intrinsics or other nodes.
18488   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18489     while (true) {
18490       // There can be multiple store sequences on the same chain.
18491       // Keep trying to merge store sequences until we are unable to do so
18492       // or until we merge the last store on the chain.
18493       bool Changed = mergeConsecutiveStores(ST);
18494       if (!Changed) break;
18495       // Return N as merge only uses CombineTo and no worklist clean
18496       // up is necessary.
18497       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18498         return SDValue(N, 0);
18499     }
18500   }
18501
18502   // Try transforming N to an indexed store.
18503   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18504     return SDValue(N, 0);
18505
18506   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18507   //
18508   // Make sure to do this only after attempting to merge stores in order to
18509   //  avoid changing the types of some subset of stores due to visit order,
18510   //  preventing their merging.
18511   if (isa<ConstantFPSDNode>(ST->getValue())) {
18512     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
18513       return NewSt;
18514   }
18515
18516   if (SDValue NewSt = splitMergedValStore(ST))
18517     return NewSt;
18518
18519   return ReduceLoadOpStoreWidth(N);
18520 }
18521
18522 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18523   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18524   if (!LifetimeEnd->hasOffset())
18525     return SDValue();
18526
18527   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18528                                         LifetimeEnd->getOffset(), false);
18529
18530   // We walk up the chains to find stores.
18531   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18532   while (!Chains.empty()) {
18533     SDValue Chain = Chains.pop_back_val();
18534     if (!Chain.hasOneUse())
18535       continue;
18536     switch (Chain.getOpcode()) {
18537     case ISD::TokenFactor:
18538       for (unsigned Nops = Chain.getNumOperands(); Nops;)
18539         Chains.push_back(Chain.getOperand(--Nops));
18540       break;
18541     case ISD::LIFETIME_START:
18542     case ISD::LIFETIME_END:
18543       // We can forward past any lifetime start/end that can be proven not to
18544       // alias the node.
18545       if (!mayAlias(Chain.getNode(), N))
18546         Chains.push_back(Chain.getOperand(0));
18547       break;
18548     case ISD::STORE: {
18549       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
18550       // TODO: Can relax for unordered atomics (see D66309)
18551       if (!ST->isSimple() || ST->isIndexed())
18552         continue;
18553       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18554       // The bounds of a scalable store are not known until runtime, so this
18555       // store cannot be elided.
18556       if (StoreSize.isScalable())
18557         continue;
18558       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
18559       // If we store purely within object bounds just before its lifetime ends,
18560       // we can remove the store.
18561       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18562                                    StoreSize.getFixedSize() * 8)) {
18563         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
18564                    dbgs() << "\nwithin LIFETIME_END of : ";
18565                    LifetimeEndBase.dump(); dbgs() << "\n");
18566         CombineTo(ST, ST->getChain());
18567         return SDValue(N, 0);
18568       }
18569     }
18570     }
18571   }
18572   return SDValue();
18573 }
18574
18575 /// For the instruction sequence of store below, F and I values
18576 /// are bundled together as an i64 value before being stored into memory.
18577 /// Sometimes it is more efficent to generate separate stores for F and I,
18578 /// which can remove the bitwise instructions or sink them to colder places.
18579 ///
18580 ///   (store (or (zext (bitcast F to i32) to i64),
18581 ///              (shl (zext I to i64), 32)), addr)  -->
18582 ///   (store F, addr) and (store I, addr+4)
18583 ///
18584 /// Similarly, splitting for other merged store can also be beneficial, like:
18585 /// For pair of {i32, i32}, i64 store --> two i32 stores.
18586 /// For pair of {i32, i16}, i64 store --> two i32 stores.
18587 /// For pair of {i16, i16}, i32 store --> two i16 stores.
18588 /// For pair of {i16, i8},  i32 store --> two i16 stores.
18589 /// For pair of {i8, i8},   i16 store --> two i8 stores.
18590 ///
18591 /// We allow each target to determine specifically which kind of splitting is
18592 /// supported.
18593 ///
18594 /// The store patterns are commonly seen from the simple code snippet below
18595 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18596 ///   void goo(const std::pair<int, float> &);
18597 ///   hoo() {
18598 ///     ...
18599 ///     goo(std::make_pair(tmp, ftmp));
18600 ///     ...
18601 ///   }
18602 ///
18603 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18604   if (OptLevel == CodeGenOpt::None)
18605     return SDValue();
18606
18607   // Can't change the number of memory accesses for a volatile store or break
18608   // atomicity for an atomic one.
18609   if (!ST->isSimple())
18610     return SDValue();
18611
18612   SDValue Val = ST->getValue();
18613   SDLoc DL(ST);
18614
18615   // Match OR operand.
18616   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18617     return SDValue();
18618
18619   // Match SHL operand and get Lower and Higher parts of Val.
18620   SDValue Op1 = Val.getOperand(0);
18621   SDValue Op2 = Val.getOperand(1);
18622   SDValue Lo, Hi;
18623   if (Op1.getOpcode() != ISD::SHL) {
18624     std::swap(Op1, Op2);
18625     if (Op1.getOpcode() != ISD::SHL)
18626       return SDValue();
18627   }
18628   Lo = Op2;
18629   Hi = Op1.getOperand(0);
18630   if (!Op1.hasOneUse())
18631     return SDValue();
18632
18633   // Match shift amount to HalfValBitSize.
18634   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18635   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18636   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18637     return SDValue();
18638
18639   // Lo and Hi are zero-extended from int with size less equal than 32
18640   // to i64.
18641   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18642       !Lo.getOperand(0).getValueType().isScalarInteger() ||
18643       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18644       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18645       !Hi.getOperand(0).getValueType().isScalarInteger() ||
18646       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18647     return SDValue();
18648
18649   // Use the EVT of low and high parts before bitcast as the input
18650   // of target query.
18651   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18652                   ? Lo.getOperand(0).getValueType()
18653                   : Lo.getValueType();
18654   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18655                    ? Hi.getOperand(0).getValueType()
18656                    : Hi.getValueType();
18657   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18658     return SDValue();
18659
18660   // Start to split store.
18661   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18662   AAMDNodes AAInfo = ST->getAAInfo();
18663
18664   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18665   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18666   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18667   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18668
18669   SDValue Chain = ST->getChain();
18670   SDValue Ptr = ST->getBasePtr();
18671   // Lower value store.
18672   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18673                              ST->getOriginalAlign(), MMOFlags, AAInfo);
18674   Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18675   // Higher value store.
18676   SDValue St1 = DAG.getStore(
18677       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18678       ST->getOriginalAlign(), MMOFlags, AAInfo);
18679   return St1;
18680 }
18681
18682 /// Convert a disguised subvector insertion into a shuffle:
18683 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18684   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
18685          "Expected extract_vector_elt");
18686   SDValue InsertVal = N->getOperand(1);
18687   SDValue Vec = N->getOperand(0);
18688
18689   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18690   // InsIndex)
18691   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
18692   //   CONCAT_VECTORS.
18693   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18694       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18695       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18696     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18697     ArrayRef<int> Mask = SVN->getMask();
18698
18699     SDValue X = Vec.getOperand(0);
18700     SDValue Y = Vec.getOperand(1);
18701
18702     // Vec's operand 0 is using indices from 0 to N-1 and
18703     // operand 1 from N to 2N - 1, where N is the number of
18704     // elements in the vectors.
18705     SDValue InsertVal0 = InsertVal.getOperand(0);
18706     int ElementOffset = -1;
18707
18708     // We explore the inputs of the shuffle in order to see if we find the
18709     // source of the extract_vector_elt. If so, we can use it to modify the
18710     // shuffle rather than perform an insert_vector_elt.
18711     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18712     ArgWorkList.emplace_back(Mask.size(), Y);
18713     ArgWorkList.emplace_back(0, X);
18714
18715     while (!ArgWorkList.empty()) {
18716       int ArgOffset;
18717       SDValue ArgVal;
18718       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18719
18720       if (ArgVal == InsertVal0) {
18721         ElementOffset = ArgOffset;
18722         break;
18723       }
18724
18725       // Peek through concat_vector.
18726       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18727         int CurrentArgOffset =
18728             ArgOffset + ArgVal.getValueType().getVectorNumElements();
18729         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18730         for (SDValue Op : reverse(ArgVal->ops())) {
18731           CurrentArgOffset -= Step;
18732           ArgWorkList.emplace_back(CurrentArgOffset, Op);
18733         }
18734
18735         // Make sure we went through all the elements and did not screw up index
18736         // computation.
18737         assert(CurrentArgOffset == ArgOffset);
18738       }
18739     }
18740
18741     if (ElementOffset != -1) {
18742       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18743
18744       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18745       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18746       assert(NewMask[InsIndex] <
18747                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
18748              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
18749
18750       SDValue LegalShuffle =
18751               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
18752                                           Y, NewMask, DAG);
18753       if (LegalShuffle)
18754         return LegalShuffle;
18755     }
18756   }
18757
18758   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18759   // bitcast(shuffle (bitcast V), (extended X), Mask)
18760   // Note: We do not use an insert_subvector node because that requires a
18761   // legal subvector type.
18762   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18763       !InsertVal.getOperand(0).getValueType().isVector())
18764     return SDValue();
18765
18766   SDValue SubVec = InsertVal.getOperand(0);
18767   SDValue DestVec = N->getOperand(0);
18768   EVT SubVecVT = SubVec.getValueType();
18769   EVT VT = DestVec.getValueType();
18770   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18771   // If the source only has a single vector element, the cost of creating adding
18772   // it to a vector is likely to exceed the cost of a insert_vector_elt.
18773   if (NumSrcElts == 1)
18774     return SDValue();
18775   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18776   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18777
18778   // Step 1: Create a shuffle mask that implements this insert operation. The
18779   // vector that we are inserting into will be operand 0 of the shuffle, so
18780   // those elements are just 'i'. The inserted subvector is in the first
18781   // positions of operand 1 of the shuffle. Example:
18782   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18783   SmallVector<int, 16> Mask(NumMaskVals);
18784   for (unsigned i = 0; i != NumMaskVals; ++i) {
18785     if (i / NumSrcElts == InsIndex)
18786       Mask[i] = (i % NumSrcElts) + NumMaskVals;
18787     else
18788       Mask[i] = i;
18789   }
18790
18791   // Bail out if the target can not handle the shuffle we want to create.
18792   EVT SubVecEltVT = SubVecVT.getVectorElementType();
18793   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18794   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18795     return SDValue();
18796
18797   // Step 2: Create a wide vector from the inserted source vector by appending
18798   // undefined elements. This is the same size as our destination vector.
18799   SDLoc DL(N);
18800   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18801   ConcatOps[0] = SubVec;
18802   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18803
18804   // Step 3: Shuffle in the padded subvector.
18805   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18806   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18807   AddToWorklist(PaddedSubV.getNode());
18808   AddToWorklist(DestVecBC.getNode());
18809   AddToWorklist(Shuf.getNode());
18810   return DAG.getBitcast(VT, Shuf);
18811 }
18812
18813 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18814   SDValue InVec = N->getOperand(0);
18815   SDValue InVal = N->getOperand(1);
18816   SDValue EltNo = N->getOperand(2);
18817   SDLoc DL(N);
18818
18819   EVT VT = InVec.getValueType();
18820   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18821
18822   // Insert into out-of-bounds element is undefined.
18823   if (IndexC && VT.isFixedLengthVector() &&
18824       IndexC->getZExtValue() >= VT.getVectorNumElements())
18825     return DAG.getUNDEF(VT);
18826
18827   // Remove redundant insertions:
18828   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18829   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18830       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18831     return InVec;
18832
18833   if (!IndexC) {
18834     // If this is variable insert to undef vector, it might be better to splat:
18835     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18836     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18837       if (VT.isScalableVector())
18838         return DAG.getSplatVector(VT, DL, InVal);
18839       else {
18840         SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
18841         return DAG.getBuildVector(VT, DL, Ops);
18842       }
18843     }
18844     return SDValue();
18845   }
18846
18847   if (VT.isScalableVector())
18848     return SDValue();
18849
18850   unsigned NumElts = VT.getVectorNumElements();
18851
18852   // We must know which element is being inserted for folds below here.
18853   unsigned Elt = IndexC->getZExtValue();
18854   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18855     return Shuf;
18856
18857   // Canonicalize insert_vector_elt dag nodes.
18858   // Example:
18859   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18860   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18861   //
18862   // Do this only if the child insert_vector node has one use; also
18863   // do this only if indices are both constants and Idx1 < Idx0.
18864   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
18865       && isa<ConstantSDNode>(InVec.getOperand(2))) {
18866     unsigned OtherElt = InVec.getConstantOperandVal(2);
18867     if (Elt < OtherElt) {
18868       // Swap nodes.
18869       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
18870                                   InVec.getOperand(0), InVal, EltNo);
18871       AddToWorklist(NewOp.getNode());
18872       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
18873                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
18874     }
18875   }
18876
18877   // If we can't generate a legal BUILD_VECTOR, exit
18878   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
18879     return SDValue();
18880
18881   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
18882   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
18883   // vector elements.
18884   SmallVector<SDValue, 8> Ops;
18885   // Do not combine these two vectors if the output vector will not replace
18886   // the input vector.
18887   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
18888     Ops.append(InVec.getNode()->op_begin(),
18889                InVec.getNode()->op_end());
18890   } else if (InVec.isUndef()) {
18891     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
18892   } else {
18893     return SDValue();
18894   }
18895   assert(Ops.size() == NumElts && "Unexpected vector size");
18896
18897   // Insert the element
18898   if (Elt < Ops.size()) {
18899     // All the operands of BUILD_VECTOR must have the same type;
18900     // we enforce that here.
18901     EVT OpVT = Ops[0].getValueType();
18902     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
18903   }
18904
18905   // Return the new vector
18906   return DAG.getBuildVector(VT, DL, Ops);
18907 }
18908
18909 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
18910                                                   SDValue EltNo,
18911                                                   LoadSDNode *OriginalLoad) {
18912   assert(OriginalLoad->isSimple());
18913
18914   EVT ResultVT = EVE->getValueType(0);
18915   EVT VecEltVT = InVecVT.getVectorElementType();
18916
18917   // If the vector element type is not a multiple of a byte then we are unable
18918   // to correctly compute an address to load only the extracted element as a
18919   // scalar.
18920   if (!VecEltVT.isByteSized())
18921     return SDValue();
18922
18923   ISD::LoadExtType ExtTy =
18924       ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
18925   if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
18926       !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
18927     return SDValue();
18928
18929   Align Alignment = OriginalLoad->getAlign();
18930   MachinePointerInfo MPI;
18931   SDLoc DL(EVE);
18932   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
18933     int Elt = ConstEltNo->getZExtValue();
18934     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
18935     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
18936     Alignment = commonAlignment(Alignment, PtrOff);
18937   } else {
18938     // Discard the pointer info except the address space because the memory
18939     // operand can't represent this new access since the offset is variable.
18940     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
18941     Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
18942   }
18943
18944   bool IsFast = false;
18945   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
18946                               OriginalLoad->getAddressSpace(), Alignment,
18947                               OriginalLoad->getMemOperand()->getFlags(),
18948                               &IsFast) ||
18949       !IsFast)
18950     return SDValue();
18951
18952   SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
18953                                                InVecVT, EltNo);
18954
18955   // The replacement we need to do here is a little tricky: we need to
18956   // replace an extractelement of a load with a load.
18957   // Use ReplaceAllUsesOfValuesWith to do the replacement.
18958   // Note that this replacement assumes that the extractvalue is the only
18959   // use of the load; that's okay because we don't want to perform this
18960   // transformation in other cases anyway.
18961   SDValue Load;
18962   SDValue Chain;
18963   if (ResultVT.bitsGT(VecEltVT)) {
18964     // If the result type of vextract is wider than the load, then issue an
18965     // extending load instead.
18966     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
18967                                                   VecEltVT)
18968                                    ? ISD::ZEXTLOAD
18969                                    : ISD::EXTLOAD;
18970     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
18971                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
18972                           Alignment, OriginalLoad->getMemOperand()->getFlags(),
18973                           OriginalLoad->getAAInfo());
18974     Chain = Load.getValue(1);
18975   } else {
18976     Load = DAG.getLoad(
18977         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
18978         OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
18979     Chain = Load.getValue(1);
18980     if (ResultVT.bitsLT(VecEltVT))
18981       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
18982     else
18983       Load = DAG.getBitcast(ResultVT, Load);
18984   }
18985   WorklistRemover DeadNodes(*this);
18986   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
18987   SDValue To[] = { Load, Chain };
18988   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
18989   // Make sure to revisit this node to clean it up; it will usually be dead.
18990   AddToWorklist(EVE);
18991   // Since we're explicitly calling ReplaceAllUses, add the new node to the
18992   // worklist explicitly as well.
18993   AddToWorklistWithUsers(Load.getNode());
18994   ++OpsNarrowed;
18995   return SDValue(EVE, 0);
18996 }
18997
18998 /// Transform a vector binary operation into a scalar binary operation by moving
18999 /// the math/logic after an extract element of a vector.
19000 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
19001                                        bool LegalOperations) {
19002   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19003   SDValue Vec = ExtElt->getOperand(0);
19004   SDValue Index = ExtElt->getOperand(1);
19005   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19006   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
19007       Vec.getNode()->getNumValues() != 1)
19008     return SDValue();
19009
19010   // Targets may want to avoid this to prevent an expensive register transfer.
19011   if (!TLI.shouldScalarizeBinop(Vec))
19012     return SDValue();
19013
19014   // Extracting an element of a vector constant is constant-folded, so this
19015   // transform is just replacing a vector op with a scalar op while moving the
19016   // extract.
19017   SDValue Op0 = Vec.getOperand(0);
19018   SDValue Op1 = Vec.getOperand(1);
19019   if (isAnyConstantBuildVector(Op0, true) ||
19020       isAnyConstantBuildVector(Op1, true)) {
19021     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
19022     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
19023     SDLoc DL(ExtElt);
19024     EVT VT = ExtElt->getValueType(0);
19025     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
19026     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
19027     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
19028   }
19029
19030   return SDValue();
19031 }
19032
19033 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
19034   SDValue VecOp = N->getOperand(0);
19035   SDValue Index = N->getOperand(1);
19036   EVT ScalarVT = N->getValueType(0);
19037   EVT VecVT = VecOp.getValueType();
19038   if (VecOp.isUndef())
19039     return DAG.getUNDEF(ScalarVT);
19040
19041   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
19042   //
19043   // This only really matters if the index is non-constant since other combines
19044   // on the constant elements already work.
19045   SDLoc DL(N);
19046   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
19047       Index == VecOp.getOperand(2)) {
19048     SDValue Elt = VecOp.getOperand(1);
19049     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
19050   }
19051
19052   // (vextract (scalar_to_vector val, 0) -> val
19053   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19054     // Only 0'th element of SCALAR_TO_VECTOR is defined.
19055     if (DAG.isKnownNeverZero(Index))
19056       return DAG.getUNDEF(ScalarVT);
19057
19058     // Check if the result type doesn't match the inserted element type. A
19059     // SCALAR_TO_VECTOR may truncate the inserted element and the
19060     // EXTRACT_VECTOR_ELT may widen the extracted vector.
19061     SDValue InOp = VecOp.getOperand(0);
19062     if (InOp.getValueType() != ScalarVT) {
19063       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19064       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19065     }
19066     return InOp;
19067   }
19068
19069   // extract_vector_elt of out-of-bounds element -> UNDEF
19070   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19071   if (IndexC && VecVT.isFixedLengthVector() &&
19072       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
19073     return DAG.getUNDEF(ScalarVT);
19074
19075   // extract_vector_elt (build_vector x, y), 1 -> y
19076   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
19077        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
19078       TLI.isTypeLegal(VecVT) &&
19079       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
19080     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
19081             VecVT.isFixedLengthVector()) &&
19082            "BUILD_VECTOR used for scalable vectors");
19083     unsigned IndexVal =
19084         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
19085     SDValue Elt = VecOp.getOperand(IndexVal);
19086     EVT InEltVT = Elt.getValueType();
19087
19088     // Sometimes build_vector's scalar input types do not match result type.
19089     if (ScalarVT == InEltVT)
19090       return Elt;
19091
19092     // TODO: It may be useful to truncate if free if the build_vector implicitly
19093     // converts.
19094   }
19095
19096   if (VecVT.isScalableVector())
19097     return SDValue();
19098
19099   // All the code from this point onwards assumes fixed width vectors, but it's
19100   // possible that some of the combinations could be made to work for scalable
19101   // vectors too.
19102   unsigned NumElts = VecVT.getVectorNumElements();
19103   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
19104
19105   // TODO: These transforms should not require the 'hasOneUse' restriction, but
19106   // there are regressions on multiple targets without it. We can end up with a
19107   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
19108   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
19109       VecOp.hasOneUse()) {
19110     // The vector index of the LSBs of the source depend on the endian-ness.
19111     bool IsLE = DAG.getDataLayout().isLittleEndian();
19112     unsigned ExtractIndex = IndexC->getZExtValue();
19113     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
19114     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
19115     SDValue BCSrc = VecOp.getOperand(0);
19116     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
19117       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
19118
19119     if (LegalTypes && BCSrc.getValueType().isInteger() &&
19120         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19121       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
19122       // trunc i64 X to i32
19123       SDValue X = BCSrc.getOperand(0);
19124       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
19125              "Extract element and scalar to vector can't change element type "
19126              "from FP to integer.");
19127       unsigned XBitWidth = X.getValueSizeInBits();
19128       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
19129
19130       // An extract element return value type can be wider than its vector
19131       // operand element type. In that case, the high bits are undefined, so
19132       // it's possible that we may need to extend rather than truncate.
19133       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
19134         assert(XBitWidth % VecEltBitWidth == 0 &&
19135                "Scalar bitwidth must be a multiple of vector element bitwidth");
19136         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
19137       }
19138     }
19139   }
19140
19141   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
19142     return BO;
19143
19144   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
19145   // We only perform this optimization before the op legalization phase because
19146   // we may introduce new vector instructions which are not backed by TD
19147   // patterns. For example on AVX, extracting elements from a wide vector
19148   // without using extract_subvector. However, if we can find an underlying
19149   // scalar value, then we can always use that.
19150   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
19151     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
19152     // Find the new index to extract from.
19153     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
19154
19155     // Extracting an undef index is undef.
19156     if (OrigElt == -1)
19157       return DAG.getUNDEF(ScalarVT);
19158
19159     // Select the right vector half to extract from.
19160     SDValue SVInVec;
19161     if (OrigElt < (int)NumElts) {
19162       SVInVec = VecOp.getOperand(0);
19163     } else {
19164       SVInVec = VecOp.getOperand(1);
19165       OrigElt -= NumElts;
19166     }
19167
19168     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
19169       SDValue InOp = SVInVec.getOperand(OrigElt);
19170       if (InOp.getValueType() != ScalarVT) {
19171         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19172         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19173       }
19174
19175       return InOp;
19176     }
19177
19178     // FIXME: We should handle recursing on other vector shuffles and
19179     // scalar_to_vector here as well.
19180
19181     if (!LegalOperations ||
19182         // FIXME: Should really be just isOperationLegalOrCustom.
19183         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
19184         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
19185       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
19186                          DAG.getVectorIdxConstant(OrigElt, DL));
19187     }
19188   }
19189
19190   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
19191   // simplify it based on the (valid) extraction indices.
19192   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
19193         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19194                Use->getOperand(0) == VecOp &&
19195                isa<ConstantSDNode>(Use->getOperand(1));
19196       })) {
19197     APInt DemandedElts = APInt::getZero(NumElts);
19198     for (SDNode *Use : VecOp->uses()) {
19199       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
19200       if (CstElt->getAPIntValue().ult(NumElts))
19201         DemandedElts.setBit(CstElt->getZExtValue());
19202     }
19203     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
19204       // We simplified the vector operand of this extract element. If this
19205       // extract is not dead, visit it again so it is folded properly.
19206       if (N->getOpcode() != ISD::DELETED_NODE)
19207         AddToWorklist(N);
19208       return SDValue(N, 0);
19209     }
19210     APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
19211     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
19212       // We simplified the vector operand of this extract element. If this
19213       // extract is not dead, visit it again so it is folded properly.
19214       if (N->getOpcode() != ISD::DELETED_NODE)
19215         AddToWorklist(N);
19216       return SDValue(N, 0);
19217     }
19218   }
19219
19220   // Everything under here is trying to match an extract of a loaded value.
19221   // If the result of load has to be truncated, then it's not necessarily
19222   // profitable.
19223   bool BCNumEltsChanged = false;
19224   EVT ExtVT = VecVT.getVectorElementType();
19225   EVT LVT = ExtVT;
19226   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
19227     return SDValue();
19228
19229   if (VecOp.getOpcode() == ISD::BITCAST) {
19230     // Don't duplicate a load with other uses.
19231     if (!VecOp.hasOneUse())
19232       return SDValue();
19233
19234     EVT BCVT = VecOp.getOperand(0).getValueType();
19235     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
19236       return SDValue();
19237     if (NumElts != BCVT.getVectorNumElements())
19238       BCNumEltsChanged = true;
19239     VecOp = VecOp.getOperand(0);
19240     ExtVT = BCVT.getVectorElementType();
19241   }
19242
19243   // extract (vector load $addr), i --> load $addr + i * size
19244   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
19245       ISD::isNormalLoad(VecOp.getNode()) &&
19246       !Index->hasPredecessor(VecOp.getNode())) {
19247     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
19248     if (VecLoad && VecLoad->isSimple())
19249       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
19250   }
19251
19252   // Perform only after legalization to ensure build_vector / vector_shuffle
19253   // optimizations have already been done.
19254   if (!LegalOperations || !IndexC)
19255     return SDValue();
19256
19257   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
19258   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
19259   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
19260   int Elt = IndexC->getZExtValue();
19261   LoadSDNode *LN0 = nullptr;
19262   if (ISD::isNormalLoad(VecOp.getNode())) {
19263     LN0 = cast<LoadSDNode>(VecOp);
19264   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19265              VecOp.getOperand(0).getValueType() == ExtVT &&
19266              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
19267     // Don't duplicate a load with other uses.
19268     if (!VecOp.hasOneUse())
19269       return SDValue();
19270
19271     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
19272   }
19273   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
19274     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
19275     // =>
19276     // (load $addr+1*size)
19277
19278     // Don't duplicate a load with other uses.
19279     if (!VecOp.hasOneUse())
19280       return SDValue();
19281
19282     // If the bit convert changed the number of elements, it is unsafe
19283     // to examine the mask.
19284     if (BCNumEltsChanged)
19285       return SDValue();
19286
19287     // Select the input vector, guarding against out of range extract vector.
19288     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
19289     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
19290
19291     if (VecOp.getOpcode() == ISD::BITCAST) {
19292       // Don't duplicate a load with other uses.
19293       if (!VecOp.hasOneUse())
19294         return SDValue();
19295
19296       VecOp = VecOp.getOperand(0);
19297     }
19298     if (ISD::isNormalLoad(VecOp.getNode())) {
19299       LN0 = cast<LoadSDNode>(VecOp);
19300       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
19301       Index = DAG.getConstant(Elt, DL, Index.getValueType());
19302     }
19303   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
19304              VecVT.getVectorElementType() == ScalarVT &&
19305              (!LegalTypes ||
19306               TLI.isTypeLegal(
19307                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
19308     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
19309     //      -> extract_vector_elt a, 0
19310     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
19311     //      -> extract_vector_elt a, 1
19312     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
19313     //      -> extract_vector_elt b, 0
19314     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
19315     //      -> extract_vector_elt b, 1
19316     SDLoc SL(N);
19317     EVT ConcatVT = VecOp.getOperand(0).getValueType();
19318     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19319     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
19320                                      Index.getValueType());
19321
19322     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
19323     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
19324                               ConcatVT.getVectorElementType(),
19325                               ConcatOp, NewIdx);
19326     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
19327   }
19328
19329   // Make sure we found a non-volatile load and the extractelement is
19330   // the only use.
19331   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19332     return SDValue();
19333
19334   // If Idx was -1 above, Elt is going to be -1, so just return undef.
19335   if (Elt == -1)
19336     return DAG.getUNDEF(LVT);
19337
19338   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19339 }
19340
19341 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
19342 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19343   // We perform this optimization post type-legalization because
19344   // the type-legalizer often scalarizes integer-promoted vectors.
19345   // Performing this optimization before may create bit-casts which
19346   // will be type-legalized to complex code sequences.
19347   // We perform this optimization only before the operation legalizer because we
19348   // may introduce illegal operations.
19349   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
19350     return SDValue();
19351
19352   unsigned NumInScalars = N->getNumOperands();
19353   SDLoc DL(N);
19354   EVT VT = N->getValueType(0);
19355
19356   // Check to see if this is a BUILD_VECTOR of a bunch of values
19357   // which come from any_extend or zero_extend nodes. If so, we can create
19358   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19359   // optimizations. We do not handle sign-extend because we can't fill the sign
19360   // using shuffles.
19361   EVT SourceType = MVT::Other;
19362   bool AllAnyExt = true;
19363
19364   for (unsigned i = 0; i != NumInScalars; ++i) {
19365     SDValue In = N->getOperand(i);
19366     // Ignore undef inputs.
19367     if (In.isUndef()) continue;
19368
19369     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
19370     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
19371
19372     // Abort if the element is not an extension.
19373     if (!ZeroExt && !AnyExt) {
19374       SourceType = MVT::Other;
19375       break;
19376     }
19377
19378     // The input is a ZeroExt or AnyExt. Check the original type.
19379     EVT InTy = In.getOperand(0).getValueType();
19380
19381     // Check that all of the widened source types are the same.
19382     if (SourceType == MVT::Other)
19383       // First time.
19384       SourceType = InTy;
19385     else if (InTy != SourceType) {
19386       // Multiple income types. Abort.
19387       SourceType = MVT::Other;
19388       break;
19389     }
19390
19391     // Check if all of the extends are ANY_EXTENDs.
19392     AllAnyExt &= AnyExt;
19393   }
19394
19395   // In order to have valid types, all of the inputs must be extended from the
19396   // same source type and all of the inputs must be any or zero extend.
19397   // Scalar sizes must be a power of two.
19398   EVT OutScalarTy = VT.getScalarType();
19399   bool ValidTypes = SourceType != MVT::Other &&
19400                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19401                  isPowerOf2_32(SourceType.getSizeInBits());
19402
19403   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19404   // turn into a single shuffle instruction.
19405   if (!ValidTypes)
19406     return SDValue();
19407
19408   // If we already have a splat buildvector, then don't fold it if it means
19409   // introducing zeros.
19410   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19411     return SDValue();
19412
19413   bool isLE = DAG.getDataLayout().isLittleEndian();
19414   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19415   assert(ElemRatio > 1 && "Invalid element size ratio");
19416   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19417                                DAG.getConstant(0, DL, SourceType);
19418
19419   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19420   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
19421
19422   // Populate the new build_vector
19423   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19424     SDValue Cast = N->getOperand(i);
19425     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
19426             Cast.getOpcode() == ISD::ZERO_EXTEND ||
19427             Cast.isUndef()) && "Invalid cast opcode");
19428     SDValue In;
19429     if (Cast.isUndef())
19430       In = DAG.getUNDEF(SourceType);
19431     else
19432       In = Cast->getOperand(0);
19433     unsigned Index = isLE ? (i * ElemRatio) :
19434                             (i * ElemRatio + (ElemRatio - 1));
19435
19436     assert(Index < Ops.size() && "Invalid index");
19437     Ops[Index] = In;
19438   }
19439
19440   // The type of the new BUILD_VECTOR node.
19441   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
19442   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
19443          "Invalid vector size");
19444   // Check if the new vector type is legal.
19445   if (!isTypeLegal(VecVT) ||
19446       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19447        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
19448     return SDValue();
19449
19450   // Make the new BUILD_VECTOR.
19451   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19452
19453   // The new BUILD_VECTOR node has the potential to be further optimized.
19454   AddToWorklist(BV.getNode());
19455   // Bitcast to the desired type.
19456   return DAG.getBitcast(VT, BV);
19457 }
19458
19459 // Simplify (build_vec (trunc $1)
19460 //                     (trunc (srl $1 half-width))
19461 //                     (trunc (srl $1 (2 * half-width))) …)
19462 // to (bitcast $1)
19463 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19464   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19465
19466   // Only for little endian
19467   if (!DAG.getDataLayout().isLittleEndian())
19468     return SDValue();
19469
19470   SDLoc DL(N);
19471   EVT VT = N->getValueType(0);
19472   EVT OutScalarTy = VT.getScalarType();
19473   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19474
19475   // Only for power of two types to be sure that bitcast works well
19476   if (!isPowerOf2_64(ScalarTypeBitsize))
19477     return SDValue();
19478
19479   unsigned NumInScalars = N->getNumOperands();
19480
19481   // Look through bitcasts
19482   auto PeekThroughBitcast = [](SDValue Op) {
19483     if (Op.getOpcode() == ISD::BITCAST)
19484       return Op.getOperand(0);
19485     return Op;
19486   };
19487
19488   // The source value where all the parts are extracted.
19489   SDValue Src;
19490   for (unsigned i = 0; i != NumInScalars; ++i) {
19491     SDValue In = PeekThroughBitcast(N->getOperand(i));
19492     // Ignore undef inputs.
19493     if (In.isUndef()) continue;
19494
19495     if (In.getOpcode() != ISD::TRUNCATE)
19496       return SDValue();
19497
19498     In = PeekThroughBitcast(In.getOperand(0));
19499
19500     if (In.getOpcode() != ISD::SRL) {
19501       // For now only build_vec without shuffling, handle shifts here in the
19502       // future.
19503       if (i != 0)
19504         return SDValue();
19505
19506       Src = In;
19507     } else {
19508       // In is SRL
19509       SDValue part = PeekThroughBitcast(In.getOperand(0));
19510
19511       if (!Src) {
19512         Src = part;
19513       } else if (Src != part) {
19514         // Vector parts do not stem from the same variable
19515         return SDValue();
19516       }
19517
19518       SDValue ShiftAmtVal = In.getOperand(1);
19519       if (!isa<ConstantSDNode>(ShiftAmtVal))
19520         return SDValue();
19521
19522       uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
19523
19524       // The extracted value is not extracted at the right position
19525       if (ShiftAmt != i * ScalarTypeBitsize)
19526         return SDValue();
19527     }
19528   }
19529
19530   // Only cast if the size is the same
19531   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19532     return SDValue();
19533
19534   return DAG.getBitcast(VT, Src);
19535 }
19536
19537 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19538                                            ArrayRef<int> VectorMask,
19539                                            SDValue VecIn1, SDValue VecIn2,
19540                                            unsigned LeftIdx, bool DidSplitVec) {
19541   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19542
19543   EVT VT = N->getValueType(0);
19544   EVT InVT1 = VecIn1.getValueType();
19545   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19546
19547   unsigned NumElems = VT.getVectorNumElements();
19548   unsigned ShuffleNumElems = NumElems;
19549
19550   // If we artificially split a vector in two already, then the offsets in the
19551   // operands will all be based off of VecIn1, even those in VecIn2.
19552   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19553
19554   uint64_t VTSize = VT.getFixedSizeInBits();
19555   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19556   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19557
19558   assert(InVT2Size <= InVT1Size &&
19559          "Inputs must be sorted to be in non-increasing vector size order.");
19560
19561   // We can't generate a shuffle node with mismatched input and output types.
19562   // Try to make the types match the type of the output.
19563   if (InVT1 != VT || InVT2 != VT) {
19564     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19565       // If the output vector length is a multiple of both input lengths,
19566       // we can concatenate them and pad the rest with undefs.
19567       unsigned NumConcats = VTSize / InVT1Size;
19568       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
19569       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
19570       ConcatOps[0] = VecIn1;
19571       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19572       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19573       VecIn2 = SDValue();
19574     } else if (InVT1Size == VTSize * 2) {
19575       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19576         return SDValue();
19577
19578       if (!VecIn2.getNode()) {
19579         // If we only have one input vector, and it's twice the size of the
19580         // output, split it in two.
19581         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
19582                              DAG.getVectorIdxConstant(NumElems, DL));
19583         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
19584         // Since we now have shorter input vectors, adjust the offset of the
19585         // second vector's start.
19586         Vec2Offset = NumElems;
19587       } else {
19588         assert(InVT2Size <= InVT1Size &&
19589                "Second input is not going to be larger than the first one.");
19590
19591         // VecIn1 is wider than the output, and we have another, possibly
19592         // smaller input. Pad the smaller input with undefs, shuffle at the
19593         // input vector width, and extract the output.
19594         // The shuffle type is different than VT, so check legality again.
19595         if (LegalOperations &&
19596             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
19597           return SDValue();
19598
19599         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19600         // lower it back into a BUILD_VECTOR. So if the inserted type is
19601         // illegal, don't even try.
19602         if (InVT1 != InVT2) {
19603           if (!TLI.isTypeLegal(InVT2))
19604             return SDValue();
19605           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19606                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19607         }
19608         ShuffleNumElems = NumElems * 2;
19609       }
19610     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19611       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19612       ConcatOps[0] = VecIn2;
19613       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19614     } else {
19615       // TODO: Support cases where the length mismatch isn't exactly by a
19616       // factor of 2.
19617       // TODO: Move this check upwards, so that if we have bad type
19618       // mismatches, we don't create any DAG nodes.
19619       return SDValue();
19620     }
19621   }
19622
19623   // Initialize mask to undef.
19624   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19625
19626   // Only need to run up to the number of elements actually used, not the
19627   // total number of elements in the shuffle - if we are shuffling a wider
19628   // vector, the high lanes should be set to undef.
19629   for (unsigned i = 0; i != NumElems; ++i) {
19630     if (VectorMask[i] <= 0)
19631       continue;
19632
19633     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19634     if (VectorMask[i] == (int)LeftIdx) {
19635       Mask[i] = ExtIndex;
19636     } else if (VectorMask[i] == (int)LeftIdx + 1) {
19637       Mask[i] = Vec2Offset + ExtIndex;
19638     }
19639   }
19640
19641   // The type the input vectors may have changed above.
19642   InVT1 = VecIn1.getValueType();
19643
19644   // If we already have a VecIn2, it should have the same type as VecIn1.
19645   // If we don't, get an undef/zero vector of the appropriate type.
19646   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19647   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
19648
19649   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19650   if (ShuffleNumElems > NumElems)
19651     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19652
19653   return Shuffle;
19654 }
19655
19656 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
19657   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19658
19659   // First, determine where the build vector is not undef.
19660   // TODO: We could extend this to handle zero elements as well as undefs.
19661   int NumBVOps = BV->getNumOperands();
19662   int ZextElt = -1;
19663   for (int i = 0; i != NumBVOps; ++i) {
19664     SDValue Op = BV->getOperand(i);
19665     if (Op.isUndef())
19666       continue;
19667     if (ZextElt == -1)
19668       ZextElt = i;
19669     else
19670       return SDValue();
19671   }
19672   // Bail out if there's no non-undef element.
19673   if (ZextElt == -1)
19674     return SDValue();
19675
19676   // The build vector contains some number of undef elements and exactly
19677   // one other element. That other element must be a zero-extended scalar
19678   // extracted from a vector at a constant index to turn this into a shuffle.
19679   // Also, require that the build vector does not implicitly truncate/extend
19680   // its elements.
19681   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19682   EVT VT = BV->getValueType(0);
19683   SDValue Zext = BV->getOperand(ZextElt);
19684   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19685       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19686       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19687       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19688     return SDValue();
19689
19690   // The zero-extend must be a multiple of the source size, and we must be
19691   // building a vector of the same size as the source of the extract element.
19692   SDValue Extract = Zext.getOperand(0);
19693   unsigned DestSize = Zext.getValueSizeInBits();
19694   unsigned SrcSize = Extract.getValueSizeInBits();
19695   if (DestSize % SrcSize != 0 ||
19696       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19697     return SDValue();
19698
19699   // Create a shuffle mask that will combine the extracted element with zeros
19700   // and undefs.
19701   int ZextRatio = DestSize / SrcSize;
19702   int NumMaskElts = NumBVOps * ZextRatio;
19703   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19704   for (int i = 0; i != NumMaskElts; ++i) {
19705     if (i / ZextRatio == ZextElt) {
19706       // The low bits of the (potentially translated) extracted element map to
19707       // the source vector. The high bits map to zero. We will use a zero vector
19708       // as the 2nd source operand of the shuffle, so use the 1st element of
19709       // that vector (mask value is number-of-elements) for the high bits.
19710       if (i % ZextRatio == 0)
19711         ShufMask[i] = Extract.getConstantOperandVal(1);
19712       else
19713         ShufMask[i] = NumMaskElts;
19714     }
19715
19716     // Undef elements of the build vector remain undef because we initialize
19717     // the shuffle mask with -1.
19718   }
19719
19720   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19721   // bitcast (shuffle V, ZeroVec, VectorMask)
19722   SDLoc DL(BV);
19723   EVT VecVT = Extract.getOperand(0).getValueType();
19724   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19725   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19726   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19727                                              ZeroVec, ShufMask, DAG);
19728   if (!Shuf)
19729     return SDValue();
19730   return DAG.getBitcast(VT, Shuf);
19731 }
19732
19733 // FIXME: promote to STLExtras.
19734 template <typename R, typename T>
19735 static auto getFirstIndexOf(R &&Range, const T &Val) {
19736   auto I = find(Range, Val);
19737   if (I == Range.end())
19738     return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
19739   return std::distance(Range.begin(), I);
19740 }
19741
19742 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19743 // operations. If the types of the vectors we're extracting from allow it,
19744 // turn this into a vector_shuffle node.
19745 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19746   SDLoc DL(N);
19747   EVT VT = N->getValueType(0);
19748
19749   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19750   if (!isTypeLegal(VT))
19751     return SDValue();
19752
19753   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
19754     return V;
19755
19756   // May only combine to shuffle after legalize if shuffle is legal.
19757   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19758     return SDValue();
19759
19760   bool UsesZeroVector = false;
19761   unsigned NumElems = N->getNumOperands();
19762
19763   // Record, for each element of the newly built vector, which input vector
19764   // that element comes from. -1 stands for undef, 0 for the zero vector,
19765   // and positive values for the input vectors.
19766   // VectorMask maps each element to its vector number, and VecIn maps vector
19767   // numbers to their initial SDValues.
19768
19769   SmallVector<int, 8> VectorMask(NumElems, -1);
19770   SmallVector<SDValue, 8> VecIn;
19771   VecIn.push_back(SDValue());
19772
19773   for (unsigned i = 0; i != NumElems; ++i) {
19774     SDValue Op = N->getOperand(i);
19775
19776     if (Op.isUndef())
19777       continue;
19778
19779     // See if we can use a blend with a zero vector.
19780     // TODO: Should we generalize this to a blend with an arbitrary constant
19781     // vector?
19782     if (isNullConstant(Op) || isNullFPConstant(Op)) {
19783       UsesZeroVector = true;
19784       VectorMask[i] = 0;
19785       continue;
19786     }
19787
19788     // Not an undef or zero. If the input is something other than an
19789     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19790     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19791         !isa<ConstantSDNode>(Op.getOperand(1)))
19792       return SDValue();
19793     SDValue ExtractedFromVec = Op.getOperand(0);
19794
19795     if (ExtractedFromVec.getValueType().isScalableVector())
19796       return SDValue();
19797
19798     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19799     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19800       return SDValue();
19801
19802     // All inputs must have the same element type as the output.
19803     if (VT.getVectorElementType() !=
19804         ExtractedFromVec.getValueType().getVectorElementType())
19805       return SDValue();
19806
19807     // Have we seen this input vector before?
19808     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19809     // a map back from SDValues to numbers isn't worth it.
19810     int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
19811     if (Idx == -1) { // A new source vector?
19812       Idx = VecIn.size();
19813       VecIn.push_back(ExtractedFromVec);
19814     }
19815
19816     VectorMask[i] = Idx;
19817   }
19818
19819   // If we didn't find at least one input vector, bail out.
19820   if (VecIn.size() < 2)
19821     return SDValue();
19822
19823   // If all the Operands of BUILD_VECTOR extract from same
19824   // vector, then split the vector efficiently based on the maximum
19825   // vector access index and adjust the VectorMask and
19826   // VecIn accordingly.
19827   bool DidSplitVec = false;
19828   if (VecIn.size() == 2) {
19829     unsigned MaxIndex = 0;
19830     unsigned NearestPow2 = 0;
19831     SDValue Vec = VecIn.back();
19832     EVT InVT = Vec.getValueType();
19833     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19834
19835     for (unsigned i = 0; i < NumElems; i++) {
19836       if (VectorMask[i] <= 0)
19837         continue;
19838       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19839       IndexVec[i] = Index;
19840       MaxIndex = std::max(MaxIndex, Index);
19841     }
19842
19843     NearestPow2 = PowerOf2Ceil(MaxIndex);
19844     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19845         NumElems * 2 < NearestPow2) {
19846       unsigned SplitSize = NearestPow2 / 2;
19847       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19848                                      InVT.getVectorElementType(), SplitSize);
19849       if (TLI.isTypeLegal(SplitVT) &&
19850           SplitSize + SplitVT.getVectorNumElements() <=
19851               InVT.getVectorNumElements()) {
19852         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19853                                      DAG.getVectorIdxConstant(SplitSize, DL));
19854         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19855                                      DAG.getVectorIdxConstant(0, DL));
19856         VecIn.pop_back();
19857         VecIn.push_back(VecIn1);
19858         VecIn.push_back(VecIn2);
19859         DidSplitVec = true;
19860
19861         for (unsigned i = 0; i < NumElems; i++) {
19862           if (VectorMask[i] <= 0)
19863             continue;
19864           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
19865         }
19866       }
19867     }
19868   }
19869
19870   // Sort input vectors by decreasing vector element count,
19871   // while preserving the relative order of equally-sized vectors.
19872   // Note that we keep the first "implicit zero vector as-is.
19873   SmallVector<SDValue, 8> SortedVecIn(VecIn);
19874   llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
19875                     [](const SDValue &a, const SDValue &b) {
19876                       return a.getValueType().getVectorNumElements() >
19877                              b.getValueType().getVectorNumElements();
19878                     });
19879
19880   // We now also need to rebuild the VectorMask, because it referenced element
19881   // order in VecIn, and we just sorted them.
19882   for (int &SourceVectorIndex : VectorMask) {
19883     if (SourceVectorIndex <= 0)
19884       continue;
19885     unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
19886     assert(Idx > 0 && Idx < SortedVecIn.size() &&
19887            VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
19888     SourceVectorIndex = Idx;
19889   }
19890
19891   VecIn = std::move(SortedVecIn);
19892
19893   // TODO: Should this fire if some of the input vectors has illegal type (like
19894   // it does now), or should we let legalization run its course first?
19895
19896   // Shuffle phase:
19897   // Take pairs of vectors, and shuffle them so that the result has elements
19898   // from these vectors in the correct places.
19899   // For example, given:
19900   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
19901   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
19902   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
19903   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
19904   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
19905   // We will generate:
19906   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
19907   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
19908   SmallVector<SDValue, 4> Shuffles;
19909   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
19910     unsigned LeftIdx = 2 * In + 1;
19911     SDValue VecLeft = VecIn[LeftIdx];
19912     SDValue VecRight =
19913         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
19914
19915     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
19916                                                 VecRight, LeftIdx, DidSplitVec))
19917       Shuffles.push_back(Shuffle);
19918     else
19919       return SDValue();
19920   }
19921
19922   // If we need the zero vector as an "ingredient" in the blend tree, add it
19923   // to the list of shuffles.
19924   if (UsesZeroVector)
19925     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
19926                                       : DAG.getConstantFP(0.0, DL, VT));
19927
19928   // If we only have one shuffle, we're done.
19929   if (Shuffles.size() == 1)
19930     return Shuffles[0];
19931
19932   // Update the vector mask to point to the post-shuffle vectors.
19933   for (int &Vec : VectorMask)
19934     if (Vec == 0)
19935       Vec = Shuffles.size() - 1;
19936     else
19937       Vec = (Vec - 1) / 2;
19938
19939   // More than one shuffle. Generate a binary tree of blends, e.g. if from
19940   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
19941   // generate:
19942   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
19943   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
19944   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
19945   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
19946   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
19947   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
19948   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
19949
19950   // Make sure the initial size of the shuffle list is even.
19951   if (Shuffles.size() % 2)
19952     Shuffles.push_back(DAG.getUNDEF(VT));
19953
19954   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
19955     if (CurSize % 2) {
19956       Shuffles[CurSize] = DAG.getUNDEF(VT);
19957       CurSize++;
19958     }
19959     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
19960       int Left = 2 * In;
19961       int Right = 2 * In + 1;
19962       SmallVector<int, 8> Mask(NumElems, -1);
19963       for (unsigned i = 0; i != NumElems; ++i) {
19964         if (VectorMask[i] == Left) {
19965           Mask[i] = i;
19966           VectorMask[i] = In;
19967         } else if (VectorMask[i] == Right) {
19968           Mask[i] = i + NumElems;
19969           VectorMask[i] = In;
19970         }
19971       }
19972
19973       Shuffles[In] =
19974           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
19975     }
19976   }
19977   return Shuffles[0];
19978 }
19979
19980 // Try to turn a build vector of zero extends of extract vector elts into a
19981 // a vector zero extend and possibly an extract subvector.
19982 // TODO: Support sign extend?
19983 // TODO: Allow undef elements?
19984 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
19985   if (LegalOperations)
19986     return SDValue();
19987
19988   EVT VT = N->getValueType(0);
19989
19990   bool FoundZeroExtend = false;
19991   SDValue Op0 = N->getOperand(0);
19992   auto checkElem = [&](SDValue Op) -> int64_t {
19993     unsigned Opc = Op.getOpcode();
19994     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
19995     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
19996         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19997         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
19998       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
19999         return C->getZExtValue();
20000     return -1;
20001   };
20002
20003   // Make sure the first element matches
20004   // (zext (extract_vector_elt X, C))
20005   // Offset must be a constant multiple of the
20006   // known-minimum vector length of the result type.
20007   int64_t Offset = checkElem(Op0);
20008   if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
20009     return SDValue();
20010
20011   unsigned NumElems = N->getNumOperands();
20012   SDValue In = Op0.getOperand(0).getOperand(0);
20013   EVT InSVT = In.getValueType().getScalarType();
20014   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
20015
20016   // Don't create an illegal input type after type legalization.
20017   if (LegalTypes && !TLI.isTypeLegal(InVT))
20018     return SDValue();
20019
20020   // Ensure all the elements come from the same vector and are adjacent.
20021   for (unsigned i = 1; i != NumElems; ++i) {
20022     if ((Offset + i) != checkElem(N->getOperand(i)))
20023       return SDValue();
20024   }
20025
20026   SDLoc DL(N);
20027   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
20028                    Op0.getOperand(0).getOperand(1));
20029   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
20030                      VT, In);
20031 }
20032
20033 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
20034   EVT VT = N->getValueType(0);
20035
20036   // A vector built entirely of undefs is undef.
20037   if (ISD::allOperandsUndef(N))
20038     return DAG.getUNDEF(VT);
20039
20040   // If this is a splat of a bitcast from another vector, change to a
20041   // concat_vector.
20042   // For example:
20043   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
20044   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
20045   //
20046   // If X is a build_vector itself, the concat can become a larger build_vector.
20047   // TODO: Maybe this is useful for non-splat too?
20048   if (!LegalOperations) {
20049     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20050       Splat = peekThroughBitcasts(Splat);
20051       EVT SrcVT = Splat.getValueType();
20052       if (SrcVT.isVector()) {
20053         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
20054         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
20055                                      SrcVT.getVectorElementType(), NumElts);
20056         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
20057           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
20058           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
20059                                        NewVT, Ops);
20060           return DAG.getBitcast(VT, Concat);
20061         }
20062       }
20063     }
20064   }
20065
20066   // Check if we can express BUILD VECTOR via subvector extract.
20067   if (!LegalTypes && (N->getNumOperands() > 1)) {
20068     SDValue Op0 = N->getOperand(0);
20069     auto checkElem = [&](SDValue Op) -> uint64_t {
20070       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
20071           (Op0.getOperand(0) == Op.getOperand(0)))
20072         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
20073           return CNode->getZExtValue();
20074       return -1;
20075     };
20076
20077     int Offset = checkElem(Op0);
20078     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
20079       if (Offset + i != checkElem(N->getOperand(i))) {
20080         Offset = -1;
20081         break;
20082       }
20083     }
20084
20085     if ((Offset == 0) &&
20086         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
20087       return Op0.getOperand(0);
20088     if ((Offset != -1) &&
20089         ((Offset % N->getValueType(0).getVectorNumElements()) ==
20090          0)) // IDX must be multiple of output size.
20091       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
20092                          Op0.getOperand(0), Op0.getOperand(1));
20093   }
20094
20095   if (SDValue V = convertBuildVecZextToZext(N))
20096     return V;
20097
20098   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
20099     return V;
20100
20101   if (SDValue V = reduceBuildVecTruncToBitCast(N))
20102     return V;
20103
20104   if (SDValue V = reduceBuildVecToShuffle(N))
20105     return V;
20106
20107   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
20108   // Do this late as some of the above may replace the splat.
20109   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
20110     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20111       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
20112       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
20113     }
20114
20115   return SDValue();
20116 }
20117
20118 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
20119   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20120   EVT OpVT = N->getOperand(0).getValueType();
20121
20122   // If the operands are legal vectors, leave them alone.
20123   if (TLI.isTypeLegal(OpVT))
20124     return SDValue();
20125
20126   SDLoc DL(N);
20127   EVT VT = N->getValueType(0);
20128   SmallVector<SDValue, 8> Ops;
20129
20130   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
20131   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20132
20133   // Keep track of what we encounter.
20134   bool AnyInteger = false;
20135   bool AnyFP = false;
20136   for (const SDValue &Op : N->ops()) {
20137     if (ISD::BITCAST == Op.getOpcode() &&
20138         !Op.getOperand(0).getValueType().isVector())
20139       Ops.push_back(Op.getOperand(0));
20140     else if (ISD::UNDEF == Op.getOpcode())
20141       Ops.push_back(ScalarUndef);
20142     else
20143       return SDValue();
20144
20145     // Note whether we encounter an integer or floating point scalar.
20146     // If it's neither, bail out, it could be something weird like x86mmx.
20147     EVT LastOpVT = Ops.back().getValueType();
20148     if (LastOpVT.isFloatingPoint())
20149       AnyFP = true;
20150     else if (LastOpVT.isInteger())
20151       AnyInteger = true;
20152     else
20153       return SDValue();
20154   }
20155
20156   // If any of the operands is a floating point scalar bitcast to a vector,
20157   // use floating point types throughout, and bitcast everything.
20158   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
20159   if (AnyFP) {
20160     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
20161     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20162     if (AnyInteger) {
20163       for (SDValue &Op : Ops) {
20164         if (Op.getValueType() == SVT)
20165           continue;
20166         if (Op.isUndef())
20167           Op = ScalarUndef;
20168         else
20169           Op = DAG.getBitcast(SVT, Op);
20170       }
20171     }
20172   }
20173
20174   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
20175                                VT.getSizeInBits() / SVT.getSizeInBits());
20176   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
20177 }
20178
20179 // Attempt to merge nested concat_vectors/undefs.
20180 // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
20181 //  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
20182 static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
20183                                                   SelectionDAG &DAG) {
20184   EVT VT = N->getValueType(0);
20185
20186   // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
20187   EVT SubVT;
20188   SDValue FirstConcat;
20189   for (const SDValue &Op : N->ops()) {
20190     if (Op.isUndef())
20191       continue;
20192     if (Op.getOpcode() != ISD::CONCAT_VECTORS)
20193       return SDValue();
20194     if (!FirstConcat) {
20195       SubVT = Op.getOperand(0).getValueType();
20196       if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
20197         return SDValue();
20198       FirstConcat = Op;
20199       continue;
20200     }
20201     if (SubVT != Op.getOperand(0).getValueType())
20202       return SDValue();
20203   }
20204   assert(FirstConcat && "Concat of all-undefs found");
20205
20206   SmallVector<SDValue> ConcatOps;
20207   for (const SDValue &Op : N->ops()) {
20208     if (Op.isUndef()) {
20209       ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
20210       continue;
20211     }
20212     ConcatOps.append(Op->op_begin(), Op->op_end());
20213   }
20214   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
20215 }
20216
20217 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
20218 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
20219 // most two distinct vectors the same size as the result, attempt to turn this
20220 // into a legal shuffle.
20221 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
20222   EVT VT = N->getValueType(0);
20223   EVT OpVT = N->getOperand(0).getValueType();
20224
20225   // We currently can't generate an appropriate shuffle for a scalable vector.
20226   if (VT.isScalableVector())
20227     return SDValue();
20228
20229   int NumElts = VT.getVectorNumElements();
20230   int NumOpElts = OpVT.getVectorNumElements();
20231
20232   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
20233   SmallVector<int, 8> Mask;
20234
20235   for (SDValue Op : N->ops()) {
20236     Op = peekThroughBitcasts(Op);
20237
20238     // UNDEF nodes convert to UNDEF shuffle mask values.
20239     if (Op.isUndef()) {
20240       Mask.append((unsigned)NumOpElts, -1);
20241       continue;
20242     }
20243
20244     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20245       return SDValue();
20246
20247     // What vector are we extracting the subvector from and at what index?
20248     SDValue ExtVec = Op.getOperand(0);
20249     int ExtIdx = Op.getConstantOperandVal(1);
20250
20251     // We want the EVT of the original extraction to correctly scale the
20252     // extraction index.
20253     EVT ExtVT = ExtVec.getValueType();
20254     ExtVec = peekThroughBitcasts(ExtVec);
20255
20256     // UNDEF nodes convert to UNDEF shuffle mask values.
20257     if (ExtVec.isUndef()) {
20258       Mask.append((unsigned)NumOpElts, -1);
20259       continue;
20260     }
20261
20262     // Ensure that we are extracting a subvector from a vector the same
20263     // size as the result.
20264     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
20265       return SDValue();
20266
20267     // Scale the subvector index to account for any bitcast.
20268     int NumExtElts = ExtVT.getVectorNumElements();
20269     if (0 == (NumExtElts % NumElts))
20270       ExtIdx /= (NumExtElts / NumElts);
20271     else if (0 == (NumElts % NumExtElts))
20272       ExtIdx *= (NumElts / NumExtElts);
20273     else
20274       return SDValue();
20275
20276     // At most we can reference 2 inputs in the final shuffle.
20277     if (SV0.isUndef() || SV0 == ExtVec) {
20278       SV0 = ExtVec;
20279       for (int i = 0; i != NumOpElts; ++i)
20280         Mask.push_back(i + ExtIdx);
20281     } else if (SV1.isUndef() || SV1 == ExtVec) {
20282       SV1 = ExtVec;
20283       for (int i = 0; i != NumOpElts; ++i)
20284         Mask.push_back(i + ExtIdx + NumElts);
20285     } else {
20286       return SDValue();
20287     }
20288   }
20289
20290   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20291   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
20292                                      DAG.getBitcast(VT, SV1), Mask, DAG);
20293 }
20294
20295 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
20296   unsigned CastOpcode = N->getOperand(0).getOpcode();
20297   switch (CastOpcode) {
20298   case ISD::SINT_TO_FP:
20299   case ISD::UINT_TO_FP:
20300   case ISD::FP_TO_SINT:
20301   case ISD::FP_TO_UINT:
20302     // TODO: Allow more opcodes?
20303     //  case ISD::BITCAST:
20304     //  case ISD::TRUNCATE:
20305     //  case ISD::ZERO_EXTEND:
20306     //  case ISD::SIGN_EXTEND:
20307     //  case ISD::FP_EXTEND:
20308     break;
20309   default:
20310     return SDValue();
20311   }
20312
20313   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
20314   if (!SrcVT.isVector())
20315     return SDValue();
20316
20317   // All operands of the concat must be the same kind of cast from the same
20318   // source type.
20319   SmallVector<SDValue, 4> SrcOps;
20320   for (SDValue Op : N->ops()) {
20321     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
20322         Op.getOperand(0).getValueType() != SrcVT)
20323       return SDValue();
20324     SrcOps.push_back(Op.getOperand(0));
20325   }
20326
20327   // The wider cast must be supported by the target. This is unusual because
20328   // the operation support type parameter depends on the opcode. In addition,
20329   // check the other type in the cast to make sure this is really legal.
20330   EVT VT = N->getValueType(0);
20331   EVT SrcEltVT = SrcVT.getVectorElementType();
20332   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
20333   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
20334   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20335   switch (CastOpcode) {
20336   case ISD::SINT_TO_FP:
20337   case ISD::UINT_TO_FP:
20338     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
20339         !TLI.isTypeLegal(VT))
20340       return SDValue();
20341     break;
20342   case ISD::FP_TO_SINT:
20343   case ISD::FP_TO_UINT:
20344     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
20345         !TLI.isTypeLegal(ConcatSrcVT))
20346       return SDValue();
20347     break;
20348   default:
20349     llvm_unreachable("Unexpected cast opcode");
20350   }
20351
20352   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
20353   SDLoc DL(N);
20354   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
20355   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
20356 }
20357
20358 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
20359   // If we only have one input vector, we don't need to do any concatenation.
20360   if (N->getNumOperands() == 1)
20361     return N->getOperand(0);
20362
20363   // Check if all of the operands are undefs.
20364   EVT VT = N->getValueType(0);
20365   if (ISD::allOperandsUndef(N))
20366     return DAG.getUNDEF(VT);
20367
20368   // Optimize concat_vectors where all but the first of the vectors are undef.
20369   if (all_of(drop_begin(N->ops()),
20370              [](const SDValue &Op) { return Op.isUndef(); })) {
20371     SDValue In = N->getOperand(0);
20372     assert(In.getValueType().isVector() && "Must concat vectors");
20373
20374     // If the input is a concat_vectors, just make a larger concat by padding
20375     // with smaller undefs.
20376     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
20377       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
20378       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
20379       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
20380       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20381     }
20382
20383     SDValue Scalar = peekThroughOneUseBitcasts(In);
20384
20385     // concat_vectors(scalar_to_vector(scalar), undef) ->
20386     //     scalar_to_vector(scalar)
20387     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20388          Scalar.hasOneUse()) {
20389       EVT SVT = Scalar.getValueType().getVectorElementType();
20390       if (SVT == Scalar.getOperand(0).getValueType())
20391         Scalar = Scalar.getOperand(0);
20392     }
20393
20394     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
20395     if (!Scalar.getValueType().isVector()) {
20396       // If the bitcast type isn't legal, it might be a trunc of a legal type;
20397       // look through the trunc so we can still do the transform:
20398       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
20399       if (Scalar->getOpcode() == ISD::TRUNCATE &&
20400           !TLI.isTypeLegal(Scalar.getValueType()) &&
20401           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
20402         Scalar = Scalar->getOperand(0);
20403
20404       EVT SclTy = Scalar.getValueType();
20405
20406       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
20407         return SDValue();
20408
20409       // Bail out if the vector size is not a multiple of the scalar size.
20410       if (VT.getSizeInBits() % SclTy.getSizeInBits())
20411         return SDValue();
20412
20413       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
20414       if (VNTNumElms < 2)
20415         return SDValue();
20416
20417       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
20418       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
20419         return SDValue();
20420
20421       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
20422       return DAG.getBitcast(VT, Res);
20423     }
20424   }
20425
20426   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
20427   // We have already tested above for an UNDEF only concatenation.
20428   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20429   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20430   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20431     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20432   };
20433   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20434     SmallVector<SDValue, 8> Opnds;
20435     EVT SVT = VT.getScalarType();
20436
20437     EVT MinVT = SVT;
20438     if (!SVT.isFloatingPoint()) {
20439       // If BUILD_VECTOR are from built from integer, they may have different
20440       // operand types. Get the smallest type and truncate all operands to it.
20441       bool FoundMinVT = false;
20442       for (const SDValue &Op : N->ops())
20443         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20444           EVT OpSVT = Op.getOperand(0).getValueType();
20445           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20446           FoundMinVT = true;
20447         }
20448       assert(FoundMinVT && "Concat vector type mismatch");
20449     }
20450
20451     for (const SDValue &Op : N->ops()) {
20452       EVT OpVT = Op.getValueType();
20453       unsigned NumElts = OpVT.getVectorNumElements();
20454
20455       if (ISD::UNDEF == Op.getOpcode())
20456         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20457
20458       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20459         if (SVT.isFloatingPoint()) {
20460           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
20461           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20462         } else {
20463           for (unsigned i = 0; i != NumElts; ++i)
20464             Opnds.push_back(
20465                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20466         }
20467       }
20468     }
20469
20470     assert(VT.getVectorNumElements() == Opnds.size() &&
20471            "Concat vector type mismatch");
20472     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20473   }
20474
20475   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20476   // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
20477   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
20478     return V;
20479
20480   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
20481     // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
20482     if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
20483       return V;
20484
20485     // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20486     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
20487       return V;
20488   }
20489
20490   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20491     return V;
20492
20493   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20494   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20495   // operands and look for a CONCAT operations that place the incoming vectors
20496   // at the exact same location.
20497   //
20498   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20499   SDValue SingleSource = SDValue();
20500   unsigned PartNumElem =
20501       N->getOperand(0).getValueType().getVectorMinNumElements();
20502
20503   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20504     SDValue Op = N->getOperand(i);
20505
20506     if (Op.isUndef())
20507       continue;
20508
20509     // Check if this is the identity extract:
20510     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20511       return SDValue();
20512
20513     // Find the single incoming vector for the extract_subvector.
20514     if (SingleSource.getNode()) {
20515       if (Op.getOperand(0) != SingleSource)
20516         return SDValue();
20517     } else {
20518       SingleSource = Op.getOperand(0);
20519
20520       // Check the source type is the same as the type of the result.
20521       // If not, this concat may extend the vector, so we can not
20522       // optimize it away.
20523       if (SingleSource.getValueType() != N->getValueType(0))
20524         return SDValue();
20525     }
20526
20527     // Check that we are reading from the identity index.
20528     unsigned IdentityIndex = i * PartNumElem;
20529     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20530       return SDValue();
20531   }
20532
20533   if (SingleSource.getNode())
20534     return SingleSource;
20535
20536   return SDValue();
20537 }
20538
20539 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20540 // if the subvector can be sourced for free.
20541 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
20542   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20543       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20544     return V.getOperand(1);
20545   }
20546   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20547   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
20548       V.getOperand(0).getValueType() == SubVT &&
20549       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
20550     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
20551     return V.getOperand(SubIdx);
20552   }
20553   return SDValue();
20554 }
20555
20556 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
20557                                               SelectionDAG &DAG,
20558                                               bool LegalOperations) {
20559   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20560   SDValue BinOp = Extract->getOperand(0);
20561   unsigned BinOpcode = BinOp.getOpcode();
20562   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
20563     return SDValue();
20564
20565   EVT VecVT = BinOp.getValueType();
20566   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
20567   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
20568     return SDValue();
20569
20570   SDValue Index = Extract->getOperand(1);
20571   EVT SubVT = Extract->getValueType(0);
20572   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
20573     return SDValue();
20574
20575   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
20576   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
20577
20578   // TODO: We could handle the case where only 1 operand is being inserted by
20579   //       creating an extract of the other operand, but that requires checking
20580   //       number of uses and/or costs.
20581   if (!Sub0 || !Sub1)
20582     return SDValue();
20583
20584   // We are inserting both operands of the wide binop only to extract back
20585   // to the narrow vector size. Eliminate all of the insert/extract:
20586   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
20587   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
20588                      BinOp->getFlags());
20589 }
20590
20591 /// If we are extracting a subvector produced by a wide binary operator try
20592 /// to use a narrow binary operator and/or avoid concatenation and extraction.
20593 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
20594                                           bool LegalOperations) {
20595   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
20596   // some of these bailouts with other transforms.
20597
20598   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20599     return V;
20600
20601   // The extract index must be a constant, so we can map it to a concat operand.
20602   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20603   if (!ExtractIndexC)
20604     return SDValue();
20605
20606   // We are looking for an optionally bitcasted wide vector binary operator
20607   // feeding an extract subvector.
20608   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20609   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20610   unsigned BOpcode = BinOp.getOpcode();
20611   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
20612     return SDValue();
20613
20614   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20615   // reduced to the unary fneg when it is visited, and we probably want to deal
20616   // with fneg in a target-specific way.
20617   if (BOpcode == ISD::FSUB) {
20618     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20619     if (C && C->getValueAPF().isNegZero())
20620       return SDValue();
20621   }
20622
20623   // The binop must be a vector type, so we can extract some fraction of it.
20624   EVT WideBVT = BinOp.getValueType();
20625   // The optimisations below currently assume we are dealing with fixed length
20626   // vectors. It is possible to add support for scalable vectors, but at the
20627   // moment we've done no analysis to prove whether they are profitable or not.
20628   if (!WideBVT.isFixedLengthVector())
20629     return SDValue();
20630
20631   EVT VT = Extract->getValueType(0);
20632   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
20633   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
20634          "Extract index is not a multiple of the vector length.");
20635
20636   // Bail out if this is not a proper multiple width extraction.
20637   unsigned WideWidth = WideBVT.getSizeInBits();
20638   unsigned NarrowWidth = VT.getSizeInBits();
20639   if (WideWidth % NarrowWidth != 0)
20640     return SDValue();
20641
20642   // Bail out if we are extracting a fraction of a single operation. This can
20643   // occur because we potentially looked through a bitcast of the binop.
20644   unsigned NarrowingRatio = WideWidth / NarrowWidth;
20645   unsigned WideNumElts = WideBVT.getVectorNumElements();
20646   if (WideNumElts % NarrowingRatio != 0)
20647     return SDValue();
20648
20649   // Bail out if the target does not support a narrower version of the binop.
20650   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
20651                                    WideNumElts / NarrowingRatio);
20652   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
20653     return SDValue();
20654
20655   // If extraction is cheap, we don't need to look at the binop operands
20656   // for concat ops. The narrow binop alone makes this transform profitable.
20657   // We can't just reuse the original extract index operand because we may have
20658   // bitcasted.
20659   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
20660   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
20661   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
20662       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
20663     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
20664     SDLoc DL(Extract);
20665     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20666     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20667                             BinOp.getOperand(0), NewExtIndex);
20668     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20669                             BinOp.getOperand(1), NewExtIndex);
20670     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
20671                                       BinOp.getNode()->getFlags());
20672     return DAG.getBitcast(VT, NarrowBinOp);
20673   }
20674
20675   // Only handle the case where we are doubling and then halving. A larger ratio
20676   // may require more than two narrow binops to replace the wide binop.
20677   if (NarrowingRatio != 2)
20678     return SDValue();
20679
20680   // TODO: The motivating case for this transform is an x86 AVX1 target. That
20681   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20682   // flavors, but no other 256-bit integer support. This could be extended to
20683   // handle any binop, but that may require fixing/adding other folds to avoid
20684   // codegen regressions.
20685   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20686     return SDValue();
20687
20688   // We need at least one concatenation operation of a binop operand to make
20689   // this transform worthwhile. The concat must double the input vector sizes.
20690   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20691     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20692       return V.getOperand(ConcatOpNum);
20693     return SDValue();
20694   };
20695   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20696   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20697
20698   if (SubVecL || SubVecR) {
20699     // If a binop operand was not the result of a concat, we must extract a
20700     // half-sized operand for our new narrow binop:
20701     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20702     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20703     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20704     SDLoc DL(Extract);
20705     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20706     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20707                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20708                                       BinOp.getOperand(0), IndexC);
20709
20710     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20711                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20712                                       BinOp.getOperand(1), IndexC);
20713
20714     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20715     return DAG.getBitcast(VT, NarrowBinOp);
20716   }
20717
20718   return SDValue();
20719 }
20720
20721 /// If we are extracting a subvector from a wide vector load, convert to a
20722 /// narrow load to eliminate the extraction:
20723 /// (extract_subvector (load wide vector)) --> (load narrow vector)
20724 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
20725   // TODO: Add support for big-endian. The offset calculation must be adjusted.
20726   if (DAG.getDataLayout().isBigEndian())
20727     return SDValue();
20728
20729   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20730   if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
20731     return SDValue();
20732
20733   // Allow targets to opt-out.
20734   EVT VT = Extract->getValueType(0);
20735
20736   // We can only create byte sized loads.
20737   if (!VT.isByteSized())
20738     return SDValue();
20739
20740   unsigned Index = Extract->getConstantOperandVal(1);
20741   unsigned NumElts = VT.getVectorMinNumElements();
20742
20743   // The definition of EXTRACT_SUBVECTOR states that the index must be a
20744   // multiple of the minimum number of elements in the result type.
20745   assert(Index % NumElts == 0 && "The extract subvector index is not a "
20746                                  "multiple of the result's element count");
20747
20748   // It's fine to use TypeSize here as we know the offset will not be negative.
20749   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20750
20751   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20752   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20753     return SDValue();
20754
20755   // The narrow load will be offset from the base address of the old load if
20756   // we are extracting from something besides index 0 (little-endian).
20757   SDLoc DL(Extract);
20758
20759   // TODO: Use "BaseIndexOffset" to make this more effective.
20760   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20761
20762   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
20763   MachineFunction &MF = DAG.getMachineFunction();
20764   MachineMemOperand *MMO;
20765   if (Offset.isScalable()) {
20766     MachinePointerInfo MPI =
20767         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20768     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20769   } else
20770     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20771                                   StoreSize);
20772
20773   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20774   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20775   return NewLd;
20776 }
20777
20778 /// Given  EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
20779 /// try to produce  VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
20780 ///                                EXTRACT_SUBVECTOR(Op?, ?),
20781 ///                                Mask'))
20782 /// iff it is legal and profitable to do so. Notably, the trimmed mask
20783 /// (containing only the elements that are extracted)
20784 /// must reference at most two subvectors.
20785 static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
20786                                                      SelectionDAG &DAG,
20787                                                      const TargetLowering &TLI,
20788                                                      bool LegalOperations) {
20789   assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20790          "Must only be called on EXTRACT_SUBVECTOR's");
20791
20792   SDValue N0 = N->getOperand(0);
20793
20794   // Only deal with non-scalable vectors.
20795   EVT NarrowVT = N->getValueType(0);
20796   EVT WideVT = N0.getValueType();
20797   if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
20798     return SDValue();
20799
20800   // The operand must be a shufflevector.
20801   auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
20802   if (!WideShuffleVector)
20803     return SDValue();
20804
20805   // The old shuffleneeds to go away.
20806   if (!WideShuffleVector->hasOneUse())
20807     return SDValue();
20808
20809   // And the narrow shufflevector that we'll form must be legal.
20810   if (LegalOperations &&
20811       !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
20812     return SDValue();
20813
20814   uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
20815   int NumEltsExtracted = NarrowVT.getVectorNumElements();
20816   assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
20817          "Extract index is not a multiple of the output vector length.");
20818
20819   int WideNumElts = WideVT.getVectorNumElements();
20820
20821   SmallVector<int, 16> NewMask;
20822   NewMask.reserve(NumEltsExtracted);
20823   SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
20824       DemandedSubvectors;
20825
20826   // Try to decode the wide mask into narrow mask from at most two subvectors.
20827   for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
20828                                                   NumEltsExtracted)) {
20829     assert((M >= -1) && (M < (2 * WideNumElts)) &&
20830            "Out-of-bounds shuffle mask?");
20831
20832     if (M < 0) {
20833       // Does not depend on operands, does not require adjustment.
20834       NewMask.emplace_back(M);
20835       continue;
20836     }
20837
20838     // From which operand of the shuffle does this shuffle mask element pick?
20839     int WideShufOpIdx = M / WideNumElts;
20840     // Which element of that operand is picked?
20841     int OpEltIdx = M % WideNumElts;
20842
20843     assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
20844            "Shuffle mask vector decomposition failure.");
20845
20846     // And which NumEltsExtracted-sized subvector of that operand is that?
20847     int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
20848     // And which element within that subvector of that operand is that?
20849     int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
20850
20851     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
20852            "Shuffle mask subvector decomposition failure.");
20853
20854     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
20855             WideShufOpIdx * WideNumElts) == M &&
20856            "Shuffle mask full decomposition failure.");
20857
20858     SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
20859
20860     if (Op.isUndef()) {
20861       // Picking from an undef operand. Let's adjust mask instead.
20862       NewMask.emplace_back(-1);
20863       continue;
20864     }
20865
20866     // Profitability check: only deal with extractions from the first subvector.
20867     if (OpSubvecIdx != 0)
20868       return SDValue();
20869
20870     const std::pair<SDValue, int> DemandedSubvector =
20871         std::make_pair(Op, OpSubvecIdx);
20872
20873     if (DemandedSubvectors.insert(DemandedSubvector)) {
20874       if (DemandedSubvectors.size() > 2)
20875         return SDValue(); // We can't handle more than two subvectors.
20876       // How many elements into the WideVT does this subvector start?
20877       int Index = NumEltsExtracted * OpSubvecIdx;
20878       // Bail out if the extraction isn't going to be cheap.
20879       if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
20880         return SDValue();
20881     }
20882
20883     // Ok, but from which operand of the new shuffle will this element pick?
20884     int NewOpIdx =
20885         getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
20886     assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
20887
20888     int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
20889     NewMask.emplace_back(AdjM);
20890   }
20891   assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
20892   assert(DemandedSubvectors.size() <= 2 &&
20893          "Should have ended up demanding at most two subvectors.");
20894
20895   // Did we discover that the shuffle does not actually depend on operands?
20896   if (DemandedSubvectors.empty())
20897     return DAG.getUNDEF(NarrowVT);
20898
20899   // We still perform the exact same EXTRACT_SUBVECTOR,  just on different
20900   // operand[s]/index[es], so there is no point in checking for it's legality.
20901
20902   // Do not turn a legal shuffle into an illegal one.
20903   if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
20904       !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
20905     return SDValue();
20906
20907   SDLoc DL(N);
20908
20909   SmallVector<SDValue, 2> NewOps;
20910   for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
20911            &DemandedSubvector : DemandedSubvectors) {
20912     // How many elements into the WideVT does this subvector start?
20913     int Index = NumEltsExtracted * DemandedSubvector.second;
20914     SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
20915     NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
20916                                     DemandedSubvector.first, IndexC));
20917   }
20918   assert((NewOps.size() == 1 || NewOps.size() == 2) &&
20919          "Should end up with either one or two ops");
20920
20921   // If we ended up with only one operand, pad with an undef.
20922   if (NewOps.size() == 1)
20923     NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
20924
20925   return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
20926 }
20927
20928 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
20929   EVT NVT = N->getValueType(0);
20930   SDValue V = N->getOperand(0);
20931   uint64_t ExtIdx = N->getConstantOperandVal(1);
20932
20933   // Extract from UNDEF is UNDEF.
20934   if (V.isUndef())
20935     return DAG.getUNDEF(NVT);
20936
20937   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
20938     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
20939       return NarrowLoad;
20940
20941   // Combine an extract of an extract into a single extract_subvector.
20942   // ext (ext X, C), 0 --> ext X, C
20943   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
20944     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
20945                                     V.getConstantOperandVal(1)) &&
20946         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
20947       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
20948                          V.getOperand(1));
20949     }
20950   }
20951
20952   // Try to move vector bitcast after extract_subv by scaling extraction index:
20953   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
20954   if (V.getOpcode() == ISD::BITCAST &&
20955       V.getOperand(0).getValueType().isVector() &&
20956       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
20957     SDValue SrcOp = V.getOperand(0);
20958     EVT SrcVT = SrcOp.getValueType();
20959     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
20960     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
20961     if ((SrcNumElts % DestNumElts) == 0) {
20962       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
20963       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
20964       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
20965                                       NewExtEC);
20966       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20967         SDLoc DL(N);
20968         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
20969         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20970                                          V.getOperand(0), NewIndex);
20971         return DAG.getBitcast(NVT, NewExtract);
20972       }
20973     }
20974     if ((DestNumElts % SrcNumElts) == 0) {
20975       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
20976       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
20977         ElementCount NewExtEC =
20978             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
20979         EVT ScalarVT = SrcVT.getScalarType();
20980         if ((ExtIdx % DestSrcRatio) == 0) {
20981           SDLoc DL(N);
20982           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
20983           EVT NewExtVT =
20984               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
20985           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20986             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20987             SDValue NewExtract =
20988                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20989                             V.getOperand(0), NewIndex);
20990             return DAG.getBitcast(NVT, NewExtract);
20991           }
20992           if (NewExtEC.isScalar() &&
20993               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
20994             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20995             SDValue NewExtract =
20996                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
20997                             V.getOperand(0), NewIndex);
20998             return DAG.getBitcast(NVT, NewExtract);
20999           }
21000         }
21001       }
21002     }
21003   }
21004
21005   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
21006     unsigned ExtNumElts = NVT.getVectorMinNumElements();
21007     EVT ConcatSrcVT = V.getOperand(0).getValueType();
21008     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
21009            "Concat and extract subvector do not change element type");
21010     assert((ExtIdx % ExtNumElts) == 0 &&
21011            "Extract index is not a multiple of the input vector length.");
21012
21013     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
21014     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
21015
21016     // If the concatenated source types match this extract, it's a direct
21017     // simplification:
21018     // extract_subvec (concat V1, V2, ...), i --> Vi
21019     if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
21020       return V.getOperand(ConcatOpIdx);
21021
21022     // If the concatenated source vectors are a multiple length of this extract,
21023     // then extract a fraction of one of those source vectors directly from a
21024     // concat operand. Example:
21025     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
21026     //   v2i8 extract_subvec v8i8 Y, 6
21027     if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
21028         ConcatSrcNumElts % ExtNumElts == 0) {
21029       SDLoc DL(N);
21030       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
21031       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
21032              "Trying to extract from >1 concat operand?");
21033       assert(NewExtIdx % ExtNumElts == 0 &&
21034              "Extract index is not a multiple of the input vector length.");
21035       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
21036       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
21037                          V.getOperand(ConcatOpIdx), NewIndexC);
21038     }
21039   }
21040
21041   if (SDValue V =
21042           foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
21043     return V;
21044
21045   V = peekThroughBitcasts(V);
21046
21047   // If the input is a build vector. Try to make a smaller build vector.
21048   if (V.getOpcode() == ISD::BUILD_VECTOR) {
21049     EVT InVT = V.getValueType();
21050     unsigned ExtractSize = NVT.getSizeInBits();
21051     unsigned EltSize = InVT.getScalarSizeInBits();
21052     // Only do this if we won't split any elements.
21053     if (ExtractSize % EltSize == 0) {
21054       unsigned NumElems = ExtractSize / EltSize;
21055       EVT EltVT = InVT.getVectorElementType();
21056       EVT ExtractVT =
21057           NumElems == 1 ? EltVT
21058                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
21059       if ((Level < AfterLegalizeDAG ||
21060            (NumElems == 1 ||
21061             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
21062           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
21063         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
21064
21065         if (NumElems == 1) {
21066           SDValue Src = V->getOperand(IdxVal);
21067           if (EltVT != Src.getValueType())
21068             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
21069           return DAG.getBitcast(NVT, Src);
21070         }
21071
21072         // Extract the pieces from the original build_vector.
21073         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
21074                                               V->ops().slice(IdxVal, NumElems));
21075         return DAG.getBitcast(NVT, BuildVec);
21076       }
21077     }
21078   }
21079
21080   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
21081     // Handle only simple case where vector being inserted and vector
21082     // being extracted are of same size.
21083     EVT SmallVT = V.getOperand(1).getValueType();
21084     if (!NVT.bitsEq(SmallVT))
21085       return SDValue();
21086
21087     // Combine:
21088     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
21089     // Into:
21090     //    indices are equal or bit offsets are equal => V1
21091     //    otherwise => (extract_subvec V1, ExtIdx)
21092     uint64_t InsIdx = V.getConstantOperandVal(2);
21093     if (InsIdx * SmallVT.getScalarSizeInBits() ==
21094         ExtIdx * NVT.getScalarSizeInBits()) {
21095       if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
21096         return SDValue();
21097
21098       return DAG.getBitcast(NVT, V.getOperand(1));
21099     }
21100     return DAG.getNode(
21101         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
21102         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
21103         N->getOperand(1));
21104   }
21105
21106   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
21107     return NarrowBOp;
21108
21109   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21110     return SDValue(N, 0);
21111
21112   return SDValue();
21113 }
21114
21115 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
21116 /// followed by concatenation. Narrow vector ops may have better performance
21117 /// than wide ops, and this can unlock further narrowing of other vector ops.
21118 /// Targets can invert this transform later if it is not profitable.
21119 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
21120                                          SelectionDAG &DAG) {
21121   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
21122   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
21123       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
21124       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
21125     return SDValue();
21126
21127   // Split the wide shuffle mask into halves. Any mask element that is accessing
21128   // operand 1 is offset down to account for narrowing of the vectors.
21129   ArrayRef<int> Mask = Shuf->getMask();
21130   EVT VT = Shuf->getValueType(0);
21131   unsigned NumElts = VT.getVectorNumElements();
21132   unsigned HalfNumElts = NumElts / 2;
21133   SmallVector<int, 16> Mask0(HalfNumElts, -1);
21134   SmallVector<int, 16> Mask1(HalfNumElts, -1);
21135   for (unsigned i = 0; i != NumElts; ++i) {
21136     if (Mask[i] == -1)
21137       continue;
21138     // If we reference the upper (undef) subvector then the element is undef.
21139     if ((Mask[i] % NumElts) >= HalfNumElts)
21140       continue;
21141     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
21142     if (i < HalfNumElts)
21143       Mask0[i] = M;
21144     else
21145       Mask1[i - HalfNumElts] = M;
21146   }
21147
21148   // Ask the target if this is a valid transform.
21149   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21150   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
21151                                 HalfNumElts);
21152   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
21153       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
21154     return SDValue();
21155
21156   // shuffle (concat X, undef), (concat Y, undef), Mask -->
21157   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
21158   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
21159   SDLoc DL(Shuf);
21160   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
21161   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
21162   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
21163 }
21164
21165 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
21166 // or turn a shuffle of a single concat into simpler shuffle then concat.
21167 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
21168   EVT VT = N->getValueType(0);
21169   unsigned NumElts = VT.getVectorNumElements();
21170
21171   SDValue N0 = N->getOperand(0);
21172   SDValue N1 = N->getOperand(1);
21173   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21174   ArrayRef<int> Mask = SVN->getMask();
21175
21176   SmallVector<SDValue, 4> Ops;
21177   EVT ConcatVT = N0.getOperand(0).getValueType();
21178   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
21179   unsigned NumConcats = NumElts / NumElemsPerConcat;
21180
21181   auto IsUndefMaskElt = [](int i) { return i == -1; };
21182
21183   // Special case: shuffle(concat(A,B)) can be more efficiently represented
21184   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
21185   // half vector elements.
21186   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
21187       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
21188                    IsUndefMaskElt)) {
21189     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
21190                               N0.getOperand(1),
21191                               Mask.slice(0, NumElemsPerConcat));
21192     N1 = DAG.getUNDEF(ConcatVT);
21193     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
21194   }
21195
21196   // Look at every vector that's inserted. We're looking for exact
21197   // subvector-sized copies from a concatenated vector
21198   for (unsigned I = 0; I != NumConcats; ++I) {
21199     unsigned Begin = I * NumElemsPerConcat;
21200     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
21201
21202     // Make sure we're dealing with a copy.
21203     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
21204       Ops.push_back(DAG.getUNDEF(ConcatVT));
21205       continue;
21206     }
21207
21208     int OpIdx = -1;
21209     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
21210       if (IsUndefMaskElt(SubMask[i]))
21211         continue;
21212       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
21213         return SDValue();
21214       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
21215       if (0 <= OpIdx && EltOpIdx != OpIdx)
21216         return SDValue();
21217       OpIdx = EltOpIdx;
21218     }
21219     assert(0 <= OpIdx && "Unknown concat_vectors op");
21220
21221     if (OpIdx < (int)N0.getNumOperands())
21222       Ops.push_back(N0.getOperand(OpIdx));
21223     else
21224       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
21225   }
21226
21227   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21228 }
21229
21230 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21231 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21232 //
21233 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
21234 // a simplification in some sense, but it isn't appropriate in general: some
21235 // BUILD_VECTORs are substantially cheaper than others. The general case
21236 // of a BUILD_VECTOR requires inserting each element individually (or
21237 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
21238 // all constants is a single constant pool load.  A BUILD_VECTOR where each
21239 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
21240 // are undef lowers to a small number of element insertions.
21241 //
21242 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
21243 // We don't fold shuffles where one side is a non-zero constant, and we don't
21244 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
21245 // non-constant operands. This seems to work out reasonably well in practice.
21246 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
21247                                        SelectionDAG &DAG,
21248                                        const TargetLowering &TLI) {
21249   EVT VT = SVN->getValueType(0);
21250   unsigned NumElts = VT.getVectorNumElements();
21251   SDValue N0 = SVN->getOperand(0);
21252   SDValue N1 = SVN->getOperand(1);
21253
21254   if (!N0->hasOneUse())
21255     return SDValue();
21256
21257   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
21258   // discussed above.
21259   if (!N1.isUndef()) {
21260     if (!N1->hasOneUse())
21261       return SDValue();
21262
21263     bool N0AnyConst = isAnyConstantBuildVector(N0);
21264     bool N1AnyConst = isAnyConstantBuildVector(N1);
21265     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
21266       return SDValue();
21267     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
21268       return SDValue();
21269   }
21270
21271   // If both inputs are splats of the same value then we can safely merge this
21272   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
21273   bool IsSplat = false;
21274   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
21275   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
21276   if (BV0 && BV1)
21277     if (SDValue Splat0 = BV0->getSplatValue())
21278       IsSplat = (Splat0 == BV1->getSplatValue());
21279
21280   SmallVector<SDValue, 8> Ops;
21281   SmallSet<SDValue, 16> DuplicateOps;
21282   for (int M : SVN->getMask()) {
21283     SDValue Op = DAG.getUNDEF(VT.getScalarType());
21284     if (M >= 0) {
21285       int Idx = M < (int)NumElts ? M : M - NumElts;
21286       SDValue &S = (M < (int)NumElts ? N0 : N1);
21287       if (S.getOpcode() == ISD::BUILD_VECTOR) {
21288         Op = S.getOperand(Idx);
21289       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
21290         SDValue Op0 = S.getOperand(0);
21291         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
21292       } else {
21293         // Operand can't be combined - bail out.
21294         return SDValue();
21295       }
21296     }
21297
21298     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
21299     // generating a splat; semantically, this is fine, but it's likely to
21300     // generate low-quality code if the target can't reconstruct an appropriate
21301     // shuffle.
21302     if (!Op.isUndef() && !isIntOrFPConstant(Op))
21303       if (!IsSplat && !DuplicateOps.insert(Op).second)
21304         return SDValue();
21305
21306     Ops.push_back(Op);
21307   }
21308
21309   // BUILD_VECTOR requires all inputs to be of the same type, find the
21310   // maximum type and extend them all.
21311   EVT SVT = VT.getScalarType();
21312   if (SVT.isInteger())
21313     for (SDValue &Op : Ops)
21314       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
21315   if (SVT != VT.getScalarType())
21316     for (SDValue &Op : Ops)
21317       Op = TLI.isZExtFree(Op.getValueType(), SVT)
21318                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
21319                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
21320   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
21321 }
21322
21323 // Match shuffles that can be converted to any_vector_extend_in_reg.
21324 // This is often generated during legalization.
21325 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
21326 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
21327 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
21328                                             SelectionDAG &DAG,
21329                                             const TargetLowering &TLI,
21330                                             bool LegalOperations) {
21331   EVT VT = SVN->getValueType(0);
21332   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21333
21334   // TODO Add support for big-endian when we have a test case.
21335   if (!VT.isInteger() || IsBigEndian)
21336     return SDValue();
21337
21338   unsigned NumElts = VT.getVectorNumElements();
21339   unsigned EltSizeInBits = VT.getScalarSizeInBits();
21340   ArrayRef<int> Mask = SVN->getMask();
21341   SDValue N0 = SVN->getOperand(0);
21342
21343   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
21344   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
21345     for (unsigned i = 0; i != NumElts; ++i) {
21346       if (Mask[i] < 0)
21347         continue;
21348       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
21349         continue;
21350       return false;
21351     }
21352     return true;
21353   };
21354
21355   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
21356   // power-of-2 extensions as they are the most likely.
21357   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
21358     // Check for non power of 2 vector sizes
21359     if (NumElts % Scale != 0)
21360       continue;
21361     if (!isAnyExtend(Scale))
21362       continue;
21363
21364     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
21365     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
21366     // Never create an illegal type. Only create unsupported operations if we
21367     // are pre-legalization.
21368     if (TLI.isTypeLegal(OutVT))
21369       if (!LegalOperations ||
21370           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
21371         return DAG.getBitcast(VT,
21372                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
21373                                           SDLoc(SVN), OutVT, N0));
21374   }
21375
21376   return SDValue();
21377 }
21378
21379 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
21380 // each source element of a large type into the lowest elements of a smaller
21381 // destination type. This is often generated during legalization.
21382 // If the source node itself was a '*_extend_vector_inreg' node then we should
21383 // then be able to remove it.
21384 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
21385                                         SelectionDAG &DAG) {
21386   EVT VT = SVN->getValueType(0);
21387   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21388
21389   // TODO Add support for big-endian when we have a test case.
21390   if (!VT.isInteger() || IsBigEndian)
21391     return SDValue();
21392
21393   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
21394
21395   unsigned Opcode = N0.getOpcode();
21396   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
21397       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
21398       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
21399     return SDValue();
21400
21401   SDValue N00 = N0.getOperand(0);
21402   ArrayRef<int> Mask = SVN->getMask();
21403   unsigned NumElts = VT.getVectorNumElements();
21404   unsigned EltSizeInBits = VT.getScalarSizeInBits();
21405   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
21406   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
21407
21408   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
21409     return SDValue();
21410   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
21411
21412   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
21413   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
21414   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
21415   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
21416     for (unsigned i = 0; i != NumElts; ++i) {
21417       if (Mask[i] < 0)
21418         continue;
21419       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
21420         continue;
21421       return false;
21422     }
21423     return true;
21424   };
21425
21426   // At the moment we just handle the case where we've truncated back to the
21427   // same size as before the extension.
21428   // TODO: handle more extension/truncation cases as cases arise.
21429   if (EltSizeInBits != ExtSrcSizeInBits)
21430     return SDValue();
21431
21432   // We can remove *extend_vector_inreg only if the truncation happens at
21433   // the same scale as the extension.
21434   if (isTruncate(ExtScale))
21435     return DAG.getBitcast(VT, N00);
21436
21437   return SDValue();
21438 }
21439
21440 // Combine shuffles of splat-shuffles of the form:
21441 // shuffle (shuffle V, undef, splat-mask), undef, M
21442 // If splat-mask contains undef elements, we need to be careful about
21443 // introducing undef's in the folded mask which are not the result of composing
21444 // the masks of the shuffles.
21445 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
21446                                         SelectionDAG &DAG) {
21447   if (!Shuf->getOperand(1).isUndef())
21448     return SDValue();
21449   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21450   if (!Splat || !Splat->isSplat())
21451     return SDValue();
21452
21453   ArrayRef<int> ShufMask = Shuf->getMask();
21454   ArrayRef<int> SplatMask = Splat->getMask();
21455   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
21456
21457   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
21458   // every undef mask element in the splat-shuffle has a corresponding undef
21459   // element in the user-shuffle's mask or if the composition of mask elements
21460   // would result in undef.
21461   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
21462   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
21463   //   In this case it is not legal to simplify to the splat-shuffle because we
21464   //   may be exposing the users of the shuffle an undef element at index 1
21465   //   which was not there before the combine.
21466   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
21467   //   In this case the composition of masks yields SplatMask, so it's ok to
21468   //   simplify to the splat-shuffle.
21469   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
21470   //   In this case the composed mask includes all undef elements of SplatMask
21471   //   and in addition sets element zero to undef. It is safe to simplify to
21472   //   the splat-shuffle.
21473   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
21474                                        ArrayRef<int> SplatMask) {
21475     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
21476       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
21477           SplatMask[UserMask[i]] != -1)
21478         return false;
21479     return true;
21480   };
21481   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
21482     return Shuf->getOperand(0);
21483
21484   // Create a new shuffle with a mask that is composed of the two shuffles'
21485   // masks.
21486   SmallVector<int, 32> NewMask;
21487   for (int Idx : ShufMask)
21488     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
21489
21490   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
21491                               Splat->getOperand(0), Splat->getOperand(1),
21492                               NewMask);
21493 }
21494
21495 /// Combine shuffle of shuffle of the form:
21496 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
21497 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
21498                                      SelectionDAG &DAG) {
21499   if (!OuterShuf->getOperand(1).isUndef())
21500     return SDValue();
21501   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
21502   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
21503     return SDValue();
21504
21505   ArrayRef<int> OuterMask = OuterShuf->getMask();
21506   ArrayRef<int> InnerMask = InnerShuf->getMask();
21507   unsigned NumElts = OuterMask.size();
21508   assert(NumElts == InnerMask.size() && "Mask length mismatch");
21509   SmallVector<int, 32> CombinedMask(NumElts, -1);
21510   int SplatIndex = -1;
21511   for (unsigned i = 0; i != NumElts; ++i) {
21512     // Undef lanes remain undef.
21513     int OuterMaskElt = OuterMask[i];
21514     if (OuterMaskElt == -1)
21515       continue;
21516
21517     // Peek through the shuffle masks to get the underlying source element.
21518     int InnerMaskElt = InnerMask[OuterMaskElt];
21519     if (InnerMaskElt == -1)
21520       continue;
21521
21522     // Initialize the splatted element.
21523     if (SplatIndex == -1)
21524       SplatIndex = InnerMaskElt;
21525
21526     // Non-matching index - this is not a splat.
21527     if (SplatIndex != InnerMaskElt)
21528       return SDValue();
21529
21530     CombinedMask[i] = InnerMaskElt;
21531   }
21532   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
21533           getSplatIndex(CombinedMask) != -1) &&
21534          "Expected a splat mask");
21535
21536   // TODO: The transform may be a win even if the mask is not legal.
21537   EVT VT = OuterShuf->getValueType(0);
21538   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
21539   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
21540     return SDValue();
21541
21542   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
21543                               InnerShuf->getOperand(1), CombinedMask);
21544 }
21545
21546 /// If the shuffle mask is taking exactly one element from the first vector
21547 /// operand and passing through all other elements from the second vector
21548 /// operand, return the index of the mask element that is choosing an element
21549 /// from the first operand. Otherwise, return -1.
21550 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
21551   int MaskSize = Mask.size();
21552   int EltFromOp0 = -1;
21553   // TODO: This does not match if there are undef elements in the shuffle mask.
21554   // Should we ignore undefs in the shuffle mask instead? The trade-off is
21555   // removing an instruction (a shuffle), but losing the knowledge that some
21556   // vector lanes are not needed.
21557   for (int i = 0; i != MaskSize; ++i) {
21558     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
21559       // We're looking for a shuffle of exactly one element from operand 0.
21560       if (EltFromOp0 != -1)
21561         return -1;
21562       EltFromOp0 = i;
21563     } else if (Mask[i] != i + MaskSize) {
21564       // Nothing from operand 1 can change lanes.
21565       return -1;
21566     }
21567   }
21568   return EltFromOp0;
21569 }
21570
21571 /// If a shuffle inserts exactly one element from a source vector operand into
21572 /// another vector operand and we can access the specified element as a scalar,
21573 /// then we can eliminate the shuffle.
21574 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
21575                                       SelectionDAG &DAG) {
21576   // First, check if we are taking one element of a vector and shuffling that
21577   // element into another vector.
21578   ArrayRef<int> Mask = Shuf->getMask();
21579   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
21580   SDValue Op0 = Shuf->getOperand(0);
21581   SDValue Op1 = Shuf->getOperand(1);
21582   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
21583   if (ShufOp0Index == -1) {
21584     // Commute mask and check again.
21585     ShuffleVectorSDNode::commuteMask(CommutedMask);
21586     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
21587     if (ShufOp0Index == -1)
21588       return SDValue();
21589     // Commute operands to match the commuted shuffle mask.
21590     std::swap(Op0, Op1);
21591     Mask = CommutedMask;
21592   }
21593
21594   // The shuffle inserts exactly one element from operand 0 into operand 1.
21595   // Now see if we can access that element as a scalar via a real insert element
21596   // instruction.
21597   // TODO: We can try harder to locate the element as a scalar. Examples: it
21598   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
21599   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
21600          "Shuffle mask value must be from operand 0");
21601   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
21602     return SDValue();
21603
21604   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
21605   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
21606     return SDValue();
21607
21608   // There's an existing insertelement with constant insertion index, so we
21609   // don't need to check the legality/profitability of a replacement operation
21610   // that differs at most in the constant value. The target should be able to
21611   // lower any of those in a similar way. If not, legalization will expand this
21612   // to a scalar-to-vector plus shuffle.
21613   //
21614   // Note that the shuffle may move the scalar from the position that the insert
21615   // element used. Therefore, our new insert element occurs at the shuffle's
21616   // mask index value, not the insert's index value.
21617   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
21618   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
21619   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
21620                      Op1, Op0.getOperand(1), NewInsIndex);
21621 }
21622
21623 /// If we have a unary shuffle of a shuffle, see if it can be folded away
21624 /// completely. This has the potential to lose undef knowledge because the first
21625 /// shuffle may not have an undef mask element where the second one does. So
21626 /// only call this after doing simplifications based on demanded elements.
21627 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
21628   // shuf (shuf0 X, Y, Mask0), undef, Mask
21629   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21630   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
21631     return SDValue();
21632
21633   ArrayRef<int> Mask = Shuf->getMask();
21634   ArrayRef<int> Mask0 = Shuf0->getMask();
21635   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
21636     // Ignore undef elements.
21637     if (Mask[i] == -1)
21638       continue;
21639     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
21640
21641     // Is the element of the shuffle operand chosen by this shuffle the same as
21642     // the element chosen by the shuffle operand itself?
21643     if (Mask0[Mask[i]] != Mask0[i])
21644       return SDValue();
21645   }
21646   // Every element of this shuffle is identical to the result of the previous
21647   // shuffle, so we can replace this value.
21648   return Shuf->getOperand(0);
21649 }
21650
21651 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
21652   EVT VT = N->getValueType(0);
21653   unsigned NumElts = VT.getVectorNumElements();
21654
21655   SDValue N0 = N->getOperand(0);
21656   SDValue N1 = N->getOperand(1);
21657
21658   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
21659
21660   // Canonicalize shuffle undef, undef -> undef
21661   if (N0.isUndef() && N1.isUndef())
21662     return DAG.getUNDEF(VT);
21663
21664   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21665
21666   // Canonicalize shuffle v, v -> v, undef
21667   if (N0 == N1)
21668     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
21669                                 createUnaryMask(SVN->getMask(), NumElts));
21670
21671   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
21672   if (N0.isUndef())
21673     return DAG.getCommutedVectorShuffle(*SVN);
21674
21675   // Remove references to rhs if it is undef
21676   if (N1.isUndef()) {
21677     bool Changed = false;
21678     SmallVector<int, 8> NewMask;
21679     for (unsigned i = 0; i != NumElts; ++i) {
21680       int Idx = SVN->getMaskElt(i);
21681       if (Idx >= (int)NumElts) {
21682         Idx = -1;
21683         Changed = true;
21684       }
21685       NewMask.push_back(Idx);
21686     }
21687     if (Changed)
21688       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
21689   }
21690
21691   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
21692     return InsElt;
21693
21694   // A shuffle of a single vector that is a splatted value can always be folded.
21695   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
21696     return V;
21697
21698   if (SDValue V = formSplatFromShuffles(SVN, DAG))
21699     return V;
21700
21701   // If it is a splat, check if the argument vector is another splat or a
21702   // build_vector.
21703   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
21704     int SplatIndex = SVN->getSplatIndex();
21705     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
21706         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
21707       // splat (vector_bo L, R), Index -->
21708       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
21709       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
21710       SDLoc DL(N);
21711       EVT EltVT = VT.getScalarType();
21712       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
21713       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
21714       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
21715       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
21716                                   N0.getNode()->getFlags());
21717       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
21718       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
21719       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
21720     }
21721
21722     // If this is a bit convert that changes the element type of the vector but
21723     // not the number of vector elements, look through it.  Be careful not to
21724     // look though conversions that change things like v4f32 to v2f64.
21725     SDNode *V = N0.getNode();
21726     if (V->getOpcode() == ISD::BITCAST) {
21727       SDValue ConvInput = V->getOperand(0);
21728       if (ConvInput.getValueType().isVector() &&
21729           ConvInput.getValueType().getVectorNumElements() == NumElts)
21730         V = ConvInput.getNode();
21731     }
21732
21733     if (V->getOpcode() == ISD::BUILD_VECTOR) {
21734       assert(V->getNumOperands() == NumElts &&
21735              "BUILD_VECTOR has wrong number of operands");
21736       SDValue Base;
21737       bool AllSame = true;
21738       for (unsigned i = 0; i != NumElts; ++i) {
21739         if (!V->getOperand(i).isUndef()) {
21740           Base = V->getOperand(i);
21741           break;
21742         }
21743       }
21744       // Splat of <u, u, u, u>, return <u, u, u, u>
21745       if (!Base.getNode())
21746         return N0;
21747       for (unsigned i = 0; i != NumElts; ++i) {
21748         if (V->getOperand(i) != Base) {
21749           AllSame = false;
21750           break;
21751         }
21752       }
21753       // Splat of <x, x, x, x>, return <x, x, x, x>
21754       if (AllSame)
21755         return N0;
21756
21757       // Canonicalize any other splat as a build_vector.
21758       SDValue Splatted = V->getOperand(SplatIndex);
21759       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
21760       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
21761
21762       // We may have jumped through bitcasts, so the type of the
21763       // BUILD_VECTOR may not match the type of the shuffle.
21764       if (V->getValueType(0) != VT)
21765         NewBV = DAG.getBitcast(VT, NewBV);
21766       return NewBV;
21767     }
21768   }
21769
21770   // Simplify source operands based on shuffle mask.
21771   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21772     return SDValue(N, 0);
21773
21774   // This is intentionally placed after demanded elements simplification because
21775   // it could eliminate knowledge of undef elements created by this shuffle.
21776   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
21777     return ShufOp;
21778
21779   // Match shuffles that can be converted to any_vector_extend_in_reg.
21780   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
21781     return V;
21782
21783   // Combine "truncate_vector_in_reg" style shuffles.
21784   if (SDValue V = combineTruncationShuffle(SVN, DAG))
21785     return V;
21786
21787   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
21788       Level < AfterLegalizeVectorOps &&
21789       (N1.isUndef() ||
21790       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
21791        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
21792     if (SDValue V = partitionShuffleOfConcats(N, DAG))
21793       return V;
21794   }
21795
21796   // A shuffle of a concat of the same narrow vector can be reduced to use
21797   // only low-half elements of a concat with undef:
21798   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
21799   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
21800       N0.getNumOperands() == 2 &&
21801       N0.getOperand(0) == N0.getOperand(1)) {
21802     int HalfNumElts = (int)NumElts / 2;
21803     SmallVector<int, 8> NewMask;
21804     for (unsigned i = 0; i != NumElts; ++i) {
21805       int Idx = SVN->getMaskElt(i);
21806       if (Idx >= HalfNumElts) {
21807         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
21808         Idx -= HalfNumElts;
21809       }
21810       NewMask.push_back(Idx);
21811     }
21812     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
21813       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
21814       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
21815                                    N0.getOperand(0), UndefVec);
21816       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
21817     }
21818   }
21819
21820   // See if we can replace a shuffle with an insert_subvector.
21821   // e.g. v2i32 into v8i32:
21822   // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
21823   // --> insert_subvector(lhs,rhs1,4).
21824   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
21825       TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
21826     auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
21827       // Ensure RHS subvectors are legal.
21828       assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
21829       EVT SubVT = RHS.getOperand(0).getValueType();
21830       int NumSubVecs = RHS.getNumOperands();
21831       int NumSubElts = SubVT.getVectorNumElements();
21832       assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
21833       if (!TLI.isTypeLegal(SubVT))
21834         return SDValue();
21835
21836       // Don't bother if we have an unary shuffle (matches undef + LHS elts).
21837       if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
21838         return SDValue();
21839
21840       // Search [NumSubElts] spans for RHS sequence.
21841       // TODO: Can we avoid nested loops to increase performance?
21842       SmallVector<int> InsertionMask(NumElts);
21843       for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
21844         for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
21845           // Reset mask to identity.
21846           std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
21847
21848           // Add subvector insertion.
21849           std::iota(InsertionMask.begin() + SubIdx,
21850                     InsertionMask.begin() + SubIdx + NumSubElts,
21851                     NumElts + (SubVec * NumSubElts));
21852
21853           // See if the shuffle mask matches the reference insertion mask.
21854           bool MatchingShuffle = true;
21855           for (int i = 0; i != (int)NumElts; ++i) {
21856             int ExpectIdx = InsertionMask[i];
21857             int ActualIdx = Mask[i];
21858             if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
21859               MatchingShuffle = false;
21860               break;
21861             }
21862           }
21863
21864           if (MatchingShuffle)
21865             return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
21866                                RHS.getOperand(SubVec),
21867                                DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
21868         }
21869       }
21870       return SDValue();
21871     };
21872     ArrayRef<int> Mask = SVN->getMask();
21873     if (N1.getOpcode() == ISD::CONCAT_VECTORS)
21874       if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
21875         return InsertN1;
21876     if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
21877       SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
21878       ShuffleVectorSDNode::commuteMask(CommuteMask);
21879       if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
21880         return InsertN0;
21881     }
21882   }
21883
21884   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21885   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21886   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
21887     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
21888       return Res;
21889
21890   // If this shuffle only has a single input that is a bitcasted shuffle,
21891   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
21892   // back to their original types.
21893   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
21894       N1.isUndef() && Level < AfterLegalizeVectorOps &&
21895       TLI.isTypeLegal(VT)) {
21896
21897     SDValue BC0 = peekThroughOneUseBitcasts(N0);
21898     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
21899       EVT SVT = VT.getScalarType();
21900       EVT InnerVT = BC0->getValueType(0);
21901       EVT InnerSVT = InnerVT.getScalarType();
21902
21903       // Determine which shuffle works with the smaller scalar type.
21904       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
21905       EVT ScaleSVT = ScaleVT.getScalarType();
21906
21907       if (TLI.isTypeLegal(ScaleVT) &&
21908           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
21909           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
21910         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21911         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21912
21913         // Scale the shuffle masks to the smaller scalar type.
21914         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
21915         SmallVector<int, 8> InnerMask;
21916         SmallVector<int, 8> OuterMask;
21917         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
21918         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
21919
21920         // Merge the shuffle masks.
21921         SmallVector<int, 8> NewMask;
21922         for (int M : OuterMask)
21923           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
21924
21925         // Test for shuffle mask legality over both commutations.
21926         SDValue SV0 = BC0->getOperand(0);
21927         SDValue SV1 = BC0->getOperand(1);
21928         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21929         if (!LegalMask) {
21930           std::swap(SV0, SV1);
21931           ShuffleVectorSDNode::commuteMask(NewMask);
21932           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21933         }
21934
21935         if (LegalMask) {
21936           SV0 = DAG.getBitcast(ScaleVT, SV0);
21937           SV1 = DAG.getBitcast(ScaleVT, SV1);
21938           return DAG.getBitcast(
21939               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
21940         }
21941       }
21942     }
21943   }
21944
21945   // Compute the combined shuffle mask for a shuffle with SV0 as the first
21946   // operand, and SV1 as the second operand.
21947   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
21948   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
21949   auto MergeInnerShuffle =
21950       [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
21951                      ShuffleVectorSDNode *OtherSVN, SDValue N1,
21952                      const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
21953                      SmallVectorImpl<int> &Mask) -> bool {
21954     // Don't try to fold splats; they're likely to simplify somehow, or they
21955     // might be free.
21956     if (OtherSVN->isSplat())
21957       return false;
21958
21959     SV0 = SV1 = SDValue();
21960     Mask.clear();
21961
21962     for (unsigned i = 0; i != NumElts; ++i) {
21963       int Idx = SVN->getMaskElt(i);
21964       if (Idx < 0) {
21965         // Propagate Undef.
21966         Mask.push_back(Idx);
21967         continue;
21968       }
21969
21970       if (Commute)
21971         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
21972
21973       SDValue CurrentVec;
21974       if (Idx < (int)NumElts) {
21975         // This shuffle index refers to the inner shuffle N0. Lookup the inner
21976         // shuffle mask to identify which vector is actually referenced.
21977         Idx = OtherSVN->getMaskElt(Idx);
21978         if (Idx < 0) {
21979           // Propagate Undef.
21980           Mask.push_back(Idx);
21981           continue;
21982         }
21983         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
21984                                           : OtherSVN->getOperand(1);
21985       } else {
21986         // This shuffle index references an element within N1.
21987         CurrentVec = N1;
21988       }
21989
21990       // Simple case where 'CurrentVec' is UNDEF.
21991       if (CurrentVec.isUndef()) {
21992         Mask.push_back(-1);
21993         continue;
21994       }
21995
21996       // Canonicalize the shuffle index. We don't know yet if CurrentVec
21997       // will be the first or second operand of the combined shuffle.
21998       Idx = Idx % NumElts;
21999       if (!SV0.getNode() || SV0 == CurrentVec) {
22000         // Ok. CurrentVec is the left hand side.
22001         // Update the mask accordingly.
22002         SV0 = CurrentVec;
22003         Mask.push_back(Idx);
22004         continue;
22005       }
22006       if (!SV1.getNode() || SV1 == CurrentVec) {
22007         // Ok. CurrentVec is the right hand side.
22008         // Update the mask accordingly.
22009         SV1 = CurrentVec;
22010         Mask.push_back(Idx + NumElts);
22011         continue;
22012       }
22013
22014       // Last chance - see if the vector is another shuffle and if it
22015       // uses one of the existing candidate shuffle ops.
22016       if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
22017         int InnerIdx = CurrentSVN->getMaskElt(Idx);
22018         if (InnerIdx < 0) {
22019           Mask.push_back(-1);
22020           continue;
22021         }
22022         SDValue InnerVec = (InnerIdx < (int)NumElts)
22023                                ? CurrentSVN->getOperand(0)
22024                                : CurrentSVN->getOperand(1);
22025         if (InnerVec.isUndef()) {
22026           Mask.push_back(-1);
22027           continue;
22028         }
22029         InnerIdx %= NumElts;
22030         if (InnerVec == SV0) {
22031           Mask.push_back(InnerIdx);
22032           continue;
22033         }
22034         if (InnerVec == SV1) {
22035           Mask.push_back(InnerIdx + NumElts);
22036           continue;
22037         }
22038       }
22039
22040       // Bail out if we cannot convert the shuffle pair into a single shuffle.
22041       return false;
22042     }
22043
22044     if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22045       return true;
22046
22047     // Avoid introducing shuffles with illegal mask.
22048     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22049     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22050     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22051     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
22052     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
22053     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
22054     if (TLI.isShuffleMaskLegal(Mask, VT))
22055       return true;
22056
22057     std::swap(SV0, SV1);
22058     ShuffleVectorSDNode::commuteMask(Mask);
22059     return TLI.isShuffleMaskLegal(Mask, VT);
22060   };
22061
22062   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
22063     // Canonicalize shuffles according to rules:
22064     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
22065     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
22066     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
22067     if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22068         N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
22069       // The incoming shuffle must be of the same type as the result of the
22070       // current shuffle.
22071       assert(N1->getOperand(0).getValueType() == VT &&
22072              "Shuffle types don't match");
22073
22074       SDValue SV0 = N1->getOperand(0);
22075       SDValue SV1 = N1->getOperand(1);
22076       bool HasSameOp0 = N0 == SV0;
22077       bool IsSV1Undef = SV1.isUndef();
22078       if (HasSameOp0 || IsSV1Undef || N0 == SV1)
22079         // Commute the operands of this shuffle so merging below will trigger.
22080         return DAG.getCommutedVectorShuffle(*SVN);
22081     }
22082
22083     // Canonicalize splat shuffles to the RHS to improve merging below.
22084     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
22085     if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
22086         N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22087         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
22088         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
22089       return DAG.getCommutedVectorShuffle(*SVN);
22090     }
22091
22092     // Try to fold according to rules:
22093     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22094     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22095     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22096     // Don't try to fold shuffles with illegal type.
22097     // Only fold if this shuffle is the only user of the other shuffle.
22098     // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
22099     for (int i = 0; i != 2; ++i) {
22100       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
22101           N->isOnlyUserOf(N->getOperand(i).getNode())) {
22102         // The incoming shuffle must be of the same type as the result of the
22103         // current shuffle.
22104         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
22105         assert(OtherSV->getOperand(0).getValueType() == VT &&
22106                "Shuffle types don't match");
22107
22108         SDValue SV0, SV1;
22109         SmallVector<int, 4> Mask;
22110         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
22111                               SV0, SV1, Mask)) {
22112           // Check if all indices in Mask are Undef. In case, propagate Undef.
22113           if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22114             return DAG.getUNDEF(VT);
22115
22116           return DAG.getVectorShuffle(VT, SDLoc(N),
22117                                       SV0 ? SV0 : DAG.getUNDEF(VT),
22118                                       SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
22119         }
22120       }
22121     }
22122
22123     // Merge shuffles through binops if we are able to merge it with at least
22124     // one other shuffles.
22125     // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
22126     // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
22127     unsigned SrcOpcode = N0.getOpcode();
22128     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
22129         (N1.isUndef() ||
22130          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
22131       // Get binop source ops, or just pass on the undef.
22132       SDValue Op00 = N0.getOperand(0);
22133       SDValue Op01 = N0.getOperand(1);
22134       SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
22135       SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
22136       // TODO: We might be able to relax the VT check but we don't currently
22137       // have any isBinOp() that has different result/ops VTs so play safe until
22138       // we have test coverage.
22139       if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
22140           Op01.getValueType() == VT && Op11.getValueType() == VT &&
22141           (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
22142            Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
22143            Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
22144            Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
22145         auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
22146                                         SmallVectorImpl<int> &Mask, bool LeftOp,
22147                                         bool Commute) {
22148           SDValue InnerN = Commute ? N1 : N0;
22149           SDValue Op0 = LeftOp ? Op00 : Op01;
22150           SDValue Op1 = LeftOp ? Op10 : Op11;
22151           if (Commute)
22152             std::swap(Op0, Op1);
22153           // Only accept the merged shuffle if we don't introduce undef elements,
22154           // or the inner shuffle already contained undef elements.
22155           auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
22156           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
22157                  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
22158                                    Mask) &&
22159                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
22160                   llvm::none_of(Mask, [](int M) { return M < 0; }));
22161         };
22162
22163         // Ensure we don't increase the number of shuffles - we must merge a
22164         // shuffle from at least one of the LHS and RHS ops.
22165         bool MergedLeft = false;
22166         SDValue LeftSV0, LeftSV1;
22167         SmallVector<int, 4> LeftMask;
22168         if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
22169             CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
22170           MergedLeft = true;
22171         } else {
22172           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22173           LeftSV0 = Op00, LeftSV1 = Op10;
22174         }
22175
22176         bool MergedRight = false;
22177         SDValue RightSV0, RightSV1;
22178         SmallVector<int, 4> RightMask;
22179         if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
22180             CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
22181           MergedRight = true;
22182         } else {
22183           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22184           RightSV0 = Op01, RightSV1 = Op11;
22185         }
22186
22187         if (MergedLeft || MergedRight) {
22188           SDLoc DL(N);
22189           SDValue LHS = DAG.getVectorShuffle(
22190               VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
22191               LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
22192           SDValue RHS = DAG.getVectorShuffle(
22193               VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
22194               RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
22195           return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
22196         }
22197       }
22198     }
22199   }
22200
22201   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
22202     return V;
22203
22204   return SDValue();
22205 }
22206
22207 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
22208   SDValue InVal = N->getOperand(0);
22209   EVT VT = N->getValueType(0);
22210
22211   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
22212   // with a VECTOR_SHUFFLE and possible truncate.
22213   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22214       VT.isFixedLengthVector() &&
22215       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
22216     SDValue InVec = InVal->getOperand(0);
22217     SDValue EltNo = InVal->getOperand(1);
22218     auto InVecT = InVec.getValueType();
22219     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
22220       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
22221       int Elt = C0->getZExtValue();
22222       NewMask[0] = Elt;
22223       // If we have an implict truncate do truncate here as long as it's legal.
22224       // if it's not legal, this should
22225       if (VT.getScalarType() != InVal.getValueType() &&
22226           InVal.getValueType().isScalarInteger() &&
22227           isTypeLegal(VT.getScalarType())) {
22228         SDValue Val =
22229             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
22230         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
22231       }
22232       if (VT.getScalarType() == InVecT.getScalarType() &&
22233           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
22234         SDValue LegalShuffle =
22235           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
22236                                       DAG.getUNDEF(InVecT), NewMask, DAG);
22237         if (LegalShuffle) {
22238           // If the initial vector is the correct size this shuffle is a
22239           // valid result.
22240           if (VT == InVecT)
22241             return LegalShuffle;
22242           // If not we must truncate the vector.
22243           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
22244             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
22245             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
22246                                          InVecT.getVectorElementType(),
22247                                          VT.getVectorNumElements());
22248             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
22249                                LegalShuffle, ZeroIdx);
22250           }
22251         }
22252       }
22253     }
22254   }
22255
22256   return SDValue();
22257 }
22258
22259 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
22260   EVT VT = N->getValueType(0);
22261   SDValue N0 = N->getOperand(0);
22262   SDValue N1 = N->getOperand(1);
22263   SDValue N2 = N->getOperand(2);
22264   uint64_t InsIdx = N->getConstantOperandVal(2);
22265
22266   // If inserting an UNDEF, just return the original vector.
22267   if (N1.isUndef())
22268     return N0;
22269
22270   // If this is an insert of an extracted vector into an undef vector, we can
22271   // just use the input to the extract.
22272   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22273       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
22274     return N1.getOperand(0);
22275
22276   // If we are inserting a bitcast value into an undef, with the same
22277   // number of elements, just use the bitcast input of the extract.
22278   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
22279   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
22280   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
22281       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22282       N1.getOperand(0).getOperand(1) == N2 &&
22283       N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
22284           VT.getVectorElementCount() &&
22285       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
22286           VT.getSizeInBits()) {
22287     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
22288   }
22289
22290   // If both N1 and N2 are bitcast values on which insert_subvector
22291   // would makes sense, pull the bitcast through.
22292   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
22293   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
22294   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
22295     SDValue CN0 = N0.getOperand(0);
22296     SDValue CN1 = N1.getOperand(0);
22297     EVT CN0VT = CN0.getValueType();
22298     EVT CN1VT = CN1.getValueType();
22299     if (CN0VT.isVector() && CN1VT.isVector() &&
22300         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
22301         CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
22302       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
22303                                       CN0.getValueType(), CN0, CN1, N2);
22304       return DAG.getBitcast(VT, NewINSERT);
22305     }
22306   }
22307
22308   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
22309   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
22310   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
22311   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
22312       N0.getOperand(1).getValueType() == N1.getValueType() &&
22313       N0.getOperand(2) == N2)
22314     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
22315                        N1, N2);
22316
22317   // Eliminate an intermediate insert into an undef vector:
22318   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
22319   // insert_subvector undef, X, N2
22320   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
22321       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
22322     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
22323                        N1.getOperand(1), N2);
22324
22325   // Push subvector bitcasts to the output, adjusting the index as we go.
22326   // insert_subvector(bitcast(v), bitcast(s), c1)
22327   // -> bitcast(insert_subvector(v, s, c2))
22328   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
22329       N1.getOpcode() == ISD::BITCAST) {
22330     SDValue N0Src = peekThroughBitcasts(N0);
22331     SDValue N1Src = peekThroughBitcasts(N1);
22332     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
22333     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
22334     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
22335         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
22336       EVT NewVT;
22337       SDLoc DL(N);
22338       SDValue NewIdx;
22339       LLVMContext &Ctx = *DAG.getContext();
22340       ElementCount NumElts = VT.getVectorElementCount();
22341       unsigned EltSizeInBits = VT.getScalarSizeInBits();
22342       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
22343         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
22344         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
22345         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
22346       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
22347         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
22348         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
22349           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
22350                                    NumElts.divideCoefficientBy(Scale));
22351           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
22352         }
22353       }
22354       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
22355         SDValue Res = DAG.getBitcast(NewVT, N0Src);
22356         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
22357         return DAG.getBitcast(VT, Res);
22358       }
22359     }
22360   }
22361
22362   // Canonicalize insert_subvector dag nodes.
22363   // Example:
22364   // (insert_subvector (insert_subvector A, Idx0), Idx1)
22365   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
22366   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
22367       N1.getValueType() == N0.getOperand(1).getValueType()) {
22368     unsigned OtherIdx = N0.getConstantOperandVal(2);
22369     if (InsIdx < OtherIdx) {
22370       // Swap nodes.
22371       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
22372                                   N0.getOperand(0), N1, N2);
22373       AddToWorklist(NewOp.getNode());
22374       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
22375                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
22376     }
22377   }
22378
22379   // If the input vector is a concatenation, and the insert replaces
22380   // one of the pieces, we can optimize into a single concat_vectors.
22381   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
22382       N0.getOperand(0).getValueType() == N1.getValueType() &&
22383       N0.getOperand(0).getValueType().isScalableVector() ==
22384           N1.getValueType().isScalableVector()) {
22385     unsigned Factor = N1.getValueType().getVectorMinNumElements();
22386     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
22387     Ops[InsIdx / Factor] = N1;
22388     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
22389   }
22390
22391   // Simplify source operands based on insertion.
22392   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
22393     return SDValue(N, 0);
22394
22395   return SDValue();
22396 }
22397
22398 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
22399   SDValue N0 = N->getOperand(0);
22400
22401   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
22402   if (N0->getOpcode() == ISD::FP16_TO_FP)
22403     return N0->getOperand(0);
22404
22405   return SDValue();
22406 }
22407
22408 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
22409   SDValue N0 = N->getOperand(0);
22410
22411   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
22412   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
22413     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
22414     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
22415       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
22416                          N0.getOperand(0));
22417     }
22418   }
22419
22420   return SDValue();
22421 }
22422
22423 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
22424   SDValue N0 = N->getOperand(0);
22425   EVT VT = N0.getValueType();
22426   unsigned Opcode = N->getOpcode();
22427
22428   // VECREDUCE over 1-element vector is just an extract.
22429   if (VT.getVectorElementCount().isScalar()) {
22430     SDLoc dl(N);
22431     SDValue Res =
22432         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
22433                     DAG.getVectorIdxConstant(0, dl));
22434     if (Res.getValueType() != N->getValueType(0))
22435       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
22436     return Res;
22437   }
22438
22439   // On an boolean vector an and/or reduction is the same as a umin/umax
22440   // reduction. Convert them if the latter is legal while the former isn't.
22441   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
22442     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
22443         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
22444     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
22445         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
22446         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
22447       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
22448   }
22449
22450   return SDValue();
22451 }
22452
22453 SDValue DAGCombiner::visitVPOp(SDNode *N) {
22454   // VP operations in which all vector elements are disabled - either by
22455   // determining that the mask is all false or that the EVL is 0 - can be
22456   // eliminated.
22457   bool AreAllEltsDisabled = false;
22458   if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
22459     AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
22460   if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
22461     AreAllEltsDisabled |=
22462         ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
22463
22464   // This is the only generic VP combine we support for now.
22465   if (!AreAllEltsDisabled)
22466     return SDValue();
22467
22468   // Binary operations can be replaced by UNDEF.
22469   if (ISD::isVPBinaryOp(N->getOpcode()))
22470     return DAG.getUNDEF(N->getValueType(0));
22471
22472   // VP Memory operations can be replaced by either the chain (stores) or the
22473   // chain + undef (loads).
22474   if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
22475     if (MemSD->writeMem())
22476       return MemSD->getChain();
22477     return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
22478   }
22479
22480   // Reduction operations return the start operand when no elements are active.
22481   if (ISD::isVPReduction(N->getOpcode()))
22482     return N->getOperand(0);
22483
22484   return SDValue();
22485 }
22486
22487 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
22488 /// with the destination vector and a zero vector.
22489 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
22490 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
22491 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
22492   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
22493
22494   EVT VT = N->getValueType(0);
22495   SDValue LHS = N->getOperand(0);
22496   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
22497   SDLoc DL(N);
22498
22499   // Make sure we're not running after operation legalization where it
22500   // may have custom lowered the vector shuffles.
22501   if (LegalOperations)
22502     return SDValue();
22503
22504   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
22505     return SDValue();
22506
22507   EVT RVT = RHS.getValueType();
22508   unsigned NumElts = RHS.getNumOperands();
22509
22510   // Attempt to create a valid clear mask, splitting the mask into
22511   // sub elements and checking to see if each is
22512   // all zeros or all ones - suitable for shuffle masking.
22513   auto BuildClearMask = [&](int Split) {
22514     int NumSubElts = NumElts * Split;
22515     int NumSubBits = RVT.getScalarSizeInBits() / Split;
22516
22517     SmallVector<int, 8> Indices;
22518     for (int i = 0; i != NumSubElts; ++i) {
22519       int EltIdx = i / Split;
22520       int SubIdx = i % Split;
22521       SDValue Elt = RHS.getOperand(EltIdx);
22522       // X & undef --> 0 (not undef). So this lane must be converted to choose
22523       // from the zero constant vector (same as if the element had all 0-bits).
22524       if (Elt.isUndef()) {
22525         Indices.push_back(i + NumSubElts);
22526         continue;
22527       }
22528
22529       APInt Bits;
22530       if (isa<ConstantSDNode>(Elt))
22531         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
22532       else if (isa<ConstantFPSDNode>(Elt))
22533         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
22534       else
22535         return SDValue();
22536
22537       // Extract the sub element from the constant bit mask.
22538       if (DAG.getDataLayout().isBigEndian())
22539         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
22540       else
22541         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
22542
22543       if (Bits.isAllOnes())
22544         Indices.push_back(i);
22545       else if (Bits == 0)
22546         Indices.push_back(i + NumSubElts);
22547       else
22548         return SDValue();
22549     }
22550
22551     // Let's see if the target supports this vector_shuffle.
22552     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
22553     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
22554     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
22555       return SDValue();
22556
22557     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
22558     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
22559                                                    DAG.getBitcast(ClearVT, LHS),
22560                                                    Zero, Indices));
22561   };
22562
22563   // Determine maximum split level (byte level masking).
22564   int MaxSplit = 1;
22565   if (RVT.getScalarSizeInBits() % 8 == 0)
22566     MaxSplit = RVT.getScalarSizeInBits() / 8;
22567
22568   for (int Split = 1; Split <= MaxSplit; ++Split)
22569     if (RVT.getScalarSizeInBits() % Split == 0)
22570       if (SDValue S = BuildClearMask(Split))
22571         return S;
22572
22573   return SDValue();
22574 }
22575
22576 /// If a vector binop is performed on splat values, it may be profitable to
22577 /// extract, scalarize, and insert/splat.
22578 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
22579                                       const SDLoc &DL) {
22580   SDValue N0 = N->getOperand(0);
22581   SDValue N1 = N->getOperand(1);
22582   unsigned Opcode = N->getOpcode();
22583   EVT VT = N->getValueType(0);
22584   EVT EltVT = VT.getVectorElementType();
22585   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22586
22587   // TODO: Remove/replace the extract cost check? If the elements are available
22588   //       as scalars, then there may be no extract cost. Should we ask if
22589   //       inserting a scalar back into a vector is cheap instead?
22590   int Index0, Index1;
22591   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
22592   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
22593   if (!Src0 || !Src1 || Index0 != Index1 ||
22594       Src0.getValueType().getVectorElementType() != EltVT ||
22595       Src1.getValueType().getVectorElementType() != EltVT ||
22596       !TLI.isExtractVecEltCheap(VT, Index0) ||
22597       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
22598     return SDValue();
22599
22600   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
22601   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
22602   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
22603   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
22604
22605   // If all lanes but 1 are undefined, no need to splat the scalar result.
22606   // TODO: Keep track of undefs and use that info in the general case.
22607   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
22608       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
22609       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
22610     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
22611     // build_vec ..undef, (bo X, Y), undef...
22612     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
22613     Ops[Index0] = ScalarBO;
22614     return DAG.getBuildVector(VT, DL, Ops);
22615   }
22616
22617   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
22618   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
22619   return DAG.getBuildVector(VT, DL, Ops);
22620 }
22621
22622 /// Visit a binary vector operation, like ADD.
22623 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
22624   EVT VT = N->getValueType(0);
22625   assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
22626
22627   SDValue LHS = N->getOperand(0);
22628   SDValue RHS = N->getOperand(1);
22629   unsigned Opcode = N->getOpcode();
22630   SDNodeFlags Flags = N->getFlags();
22631
22632   // Move unary shuffles with identical masks after a vector binop:
22633   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
22634   //   --> shuffle (VBinOp A, B), Undef, Mask
22635   // This does not require type legality checks because we are creating the
22636   // same types of operations that are in the original sequence. We do have to
22637   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
22638   // though. This code is adapted from the identical transform in instcombine.
22639   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
22640       Opcode != ISD::UREM && Opcode != ISD::SREM &&
22641       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
22642     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
22643     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
22644     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
22645         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
22646         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
22647       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
22648                                      RHS.getOperand(0), Flags);
22649       SDValue UndefV = LHS.getOperand(1);
22650       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
22651     }
22652
22653     // Try to sink a splat shuffle after a binop with a uniform constant.
22654     // This is limited to cases where neither the shuffle nor the constant have
22655     // undefined elements because that could be poison-unsafe or inhibit
22656     // demanded elements analysis. It is further limited to not change a splat
22657     // of an inserted scalar because that may be optimized better by
22658     // load-folding or other target-specific behaviors.
22659     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
22660         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
22661         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22662       // binop (splat X), (splat C) --> splat (binop X, C)
22663       SDValue X = Shuf0->getOperand(0);
22664       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
22665       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22666                                   Shuf0->getMask());
22667     }
22668     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
22669         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
22670         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22671       // binop (splat C), (splat X) --> splat (binop C, X)
22672       SDValue X = Shuf1->getOperand(0);
22673       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
22674       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22675                                   Shuf1->getMask());
22676     }
22677   }
22678
22679   // The following pattern is likely to emerge with vector reduction ops. Moving
22680   // the binary operation ahead of insertion may allow using a narrower vector
22681   // instruction that has better performance than the wide version of the op:
22682   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
22683   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
22684       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
22685       LHS.getOperand(2) == RHS.getOperand(2) &&
22686       (LHS.hasOneUse() || RHS.hasOneUse())) {
22687     SDValue X = LHS.getOperand(1);
22688     SDValue Y = RHS.getOperand(1);
22689     SDValue Z = LHS.getOperand(2);
22690     EVT NarrowVT = X.getValueType();
22691     if (NarrowVT == Y.getValueType() &&
22692         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
22693                                               LegalOperations)) {
22694       // (binop undef, undef) may not return undef, so compute that result.
22695       SDValue VecC =
22696           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
22697       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
22698       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
22699     }
22700   }
22701
22702   // Make sure all but the first op are undef or constant.
22703   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
22704     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
22705            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
22706              return Op.isUndef() ||
22707                     ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
22708            });
22709   };
22710
22711   // The following pattern is likely to emerge with vector reduction ops. Moving
22712   // the binary operation ahead of the concat may allow using a narrower vector
22713   // instruction that has better performance than the wide version of the op:
22714   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
22715   //   concat (VBinOp X, Y), VecC
22716   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
22717       (LHS.hasOneUse() || RHS.hasOneUse())) {
22718     EVT NarrowVT = LHS.getOperand(0).getValueType();
22719     if (NarrowVT == RHS.getOperand(0).getValueType() &&
22720         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
22721       unsigned NumOperands = LHS.getNumOperands();
22722       SmallVector<SDValue, 4> ConcatOps;
22723       for (unsigned i = 0; i != NumOperands; ++i) {
22724         // This constant fold for operands 1 and up.
22725         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
22726                                         RHS.getOperand(i)));
22727       }
22728
22729       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22730     }
22731   }
22732
22733   if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
22734     return V;
22735
22736   return SDValue();
22737 }
22738
22739 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
22740                                     SDValue N2) {
22741   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
22742
22743   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
22744                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
22745
22746   // If we got a simplified select_cc node back from SimplifySelectCC, then
22747   // break it down into a new SETCC node, and a new SELECT node, and then return
22748   // the SELECT node, since we were called with a SELECT node.
22749   if (SCC.getNode()) {
22750     // Check to see if we got a select_cc back (to turn into setcc/select).
22751     // Otherwise, just return whatever node we got back, like fabs.
22752     if (SCC.getOpcode() == ISD::SELECT_CC) {
22753       const SDNodeFlags Flags = N0.getNode()->getFlags();
22754       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
22755                                   N0.getValueType(),
22756                                   SCC.getOperand(0), SCC.getOperand(1),
22757                                   SCC.getOperand(4), Flags);
22758       AddToWorklist(SETCC.getNode());
22759       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
22760                                          SCC.getOperand(2), SCC.getOperand(3));
22761       SelectNode->setFlags(Flags);
22762       return SelectNode;
22763     }
22764
22765     return SCC;
22766   }
22767   return SDValue();
22768 }
22769
22770 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
22771 /// being selected between, see if we can simplify the select.  Callers of this
22772 /// should assume that TheSelect is deleted if this returns true.  As such, they
22773 /// should return the appropriate thing (e.g. the node) back to the top-level of
22774 /// the DAG combiner loop to avoid it being looked at.
22775 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
22776                                     SDValue RHS) {
22777   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22778   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
22779   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
22780     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
22781       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
22782       SDValue Sqrt = RHS;
22783       ISD::CondCode CC;
22784       SDValue CmpLHS;
22785       const ConstantFPSDNode *Zero = nullptr;
22786
22787       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
22788         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
22789         CmpLHS = TheSelect->getOperand(0);
22790         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
22791       } else {
22792         // SELECT or VSELECT
22793         SDValue Cmp = TheSelect->getOperand(0);
22794         if (Cmp.getOpcode() == ISD::SETCC) {
22795           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
22796           CmpLHS = Cmp.getOperand(0);
22797           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
22798         }
22799       }
22800       if (Zero && Zero->isZero() &&
22801           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
22802           CC == ISD::SETULT || CC == ISD::SETLT)) {
22803         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22804         CombineTo(TheSelect, Sqrt);
22805         return true;
22806       }
22807     }
22808   }
22809   // Cannot simplify select with vector condition
22810   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
22811
22812   // If this is a select from two identical things, try to pull the operation
22813   // through the select.
22814   if (LHS.getOpcode() != RHS.getOpcode() ||
22815       !LHS.hasOneUse() || !RHS.hasOneUse())
22816     return false;
22817
22818   // If this is a load and the token chain is identical, replace the select
22819   // of two loads with a load through a select of the address to load from.
22820   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
22821   // constants have been dropped into the constant pool.
22822   if (LHS.getOpcode() == ISD::LOAD) {
22823     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
22824     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
22825
22826     // Token chains must be identical.
22827     if (LHS.getOperand(0) != RHS.getOperand(0) ||
22828         // Do not let this transformation reduce the number of volatile loads.
22829         // Be conservative for atomics for the moment
22830         // TODO: This does appear to be legal for unordered atomics (see D66309)
22831         !LLD->isSimple() || !RLD->isSimple() ||
22832         // FIXME: If either is a pre/post inc/dec load,
22833         // we'd need to split out the address adjustment.
22834         LLD->isIndexed() || RLD->isIndexed() ||
22835         // If this is an EXTLOAD, the VT's must match.
22836         LLD->getMemoryVT() != RLD->getMemoryVT() ||
22837         // If this is an EXTLOAD, the kind of extension must match.
22838         (LLD->getExtensionType() != RLD->getExtensionType() &&
22839          // The only exception is if one of the extensions is anyext.
22840          LLD->getExtensionType() != ISD::EXTLOAD &&
22841          RLD->getExtensionType() != ISD::EXTLOAD) ||
22842         // FIXME: this discards src value information.  This is
22843         // over-conservative. It would be beneficial to be able to remember
22844         // both potential memory locations.  Since we are discarding
22845         // src value info, don't do the transformation if the memory
22846         // locations are not in the default address space.
22847         LLD->getPointerInfo().getAddrSpace() != 0 ||
22848         RLD->getPointerInfo().getAddrSpace() != 0 ||
22849         // We can't produce a CMOV of a TargetFrameIndex since we won't
22850         // generate the address generation required.
22851         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22852         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22853         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
22854                                       LLD->getBasePtr().getValueType()))
22855       return false;
22856
22857     // The loads must not depend on one another.
22858     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
22859       return false;
22860
22861     // Check that the select condition doesn't reach either load.  If so,
22862     // folding this will induce a cycle into the DAG.  If not, this is safe to
22863     // xform, so create a select of the addresses.
22864
22865     SmallPtrSet<const SDNode *, 32> Visited;
22866     SmallVector<const SDNode *, 16> Worklist;
22867
22868     // Always fail if LLD and RLD are not independent. TheSelect is a
22869     // predecessor to all Nodes in question so we need not search past it.
22870
22871     Visited.insert(TheSelect);
22872     Worklist.push_back(LLD);
22873     Worklist.push_back(RLD);
22874
22875     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
22876         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
22877       return false;
22878
22879     SDValue Addr;
22880     if (TheSelect->getOpcode() == ISD::SELECT) {
22881       // We cannot do this optimization if any pair of {RLD, LLD} is a
22882       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
22883       // Loads, we only need to check if CondNode is a successor to one of the
22884       // loads. We can further avoid this if there's no use of their chain
22885       // value.
22886       SDNode *CondNode = TheSelect->getOperand(0).getNode();
22887       Worklist.push_back(CondNode);
22888
22889       if ((LLD->hasAnyUseOfValue(1) &&
22890            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22891           (RLD->hasAnyUseOfValue(1) &&
22892            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22893         return false;
22894
22895       Addr = DAG.getSelect(SDLoc(TheSelect),
22896                            LLD->getBasePtr().getValueType(),
22897                            TheSelect->getOperand(0), LLD->getBasePtr(),
22898                            RLD->getBasePtr());
22899     } else {  // Otherwise SELECT_CC
22900       // We cannot do this optimization if any pair of {RLD, LLD} is a
22901       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
22902       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
22903       // one of the loads. We can further avoid this if there's no use of their
22904       // chain value.
22905
22906       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
22907       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
22908       Worklist.push_back(CondLHS);
22909       Worklist.push_back(CondRHS);
22910
22911       if ((LLD->hasAnyUseOfValue(1) &&
22912            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22913           (RLD->hasAnyUseOfValue(1) &&
22914            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22915         return false;
22916
22917       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
22918                          LLD->getBasePtr().getValueType(),
22919                          TheSelect->getOperand(0),
22920                          TheSelect->getOperand(1),
22921                          LLD->getBasePtr(), RLD->getBasePtr(),
22922                          TheSelect->getOperand(4));
22923     }
22924
22925     SDValue Load;
22926     // It is safe to replace the two loads if they have different alignments,
22927     // but the new load must be the minimum (most restrictive) alignment of the
22928     // inputs.
22929     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
22930     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
22931     if (!RLD->isInvariant())
22932       MMOFlags &= ~MachineMemOperand::MOInvariant;
22933     if (!RLD->isDereferenceable())
22934       MMOFlags &= ~MachineMemOperand::MODereferenceable;
22935     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
22936       // FIXME: Discards pointer and AA info.
22937       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
22938                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
22939                          MMOFlags);
22940     } else {
22941       // FIXME: Discards pointer and AA info.
22942       Load = DAG.getExtLoad(
22943           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
22944                                                   : LLD->getExtensionType(),
22945           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
22946           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
22947     }
22948
22949     // Users of the select now use the result of the load.
22950     CombineTo(TheSelect, Load);
22951
22952     // Users of the old loads now use the new load's chain.  We know the
22953     // old-load value is dead now.
22954     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
22955     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
22956     return true;
22957   }
22958
22959   return false;
22960 }
22961
22962 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
22963 /// bitwise 'and'.
22964 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
22965                                             SDValue N1, SDValue N2, SDValue N3,
22966                                             ISD::CondCode CC) {
22967   // If this is a select where the false operand is zero and the compare is a
22968   // check of the sign bit, see if we can perform the "gzip trick":
22969   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
22970   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
22971   EVT XType = N0.getValueType();
22972   EVT AType = N2.getValueType();
22973   if (!isNullConstant(N3) || !XType.bitsGE(AType))
22974     return SDValue();
22975
22976   // If the comparison is testing for a positive value, we have to invert
22977   // the sign bit mask, so only do that transform if the target has a bitwise
22978   // 'and not' instruction (the invert is free).
22979   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
22980     // (X > -1) ? A : 0
22981     // (X >  0) ? X : 0 <-- This is canonical signed max.
22982     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
22983       return SDValue();
22984   } else if (CC == ISD::SETLT) {
22985     // (X <  0) ? A : 0
22986     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
22987     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
22988       return SDValue();
22989   } else {
22990     return SDValue();
22991   }
22992
22993   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
22994   // constant.
22995   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
22996   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22997   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
22998     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
22999     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
23000       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23001       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
23002       AddToWorklist(Shift.getNode());
23003
23004       if (XType.bitsGT(AType)) {
23005         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23006         AddToWorklist(Shift.getNode());
23007       }
23008
23009       if (CC == ISD::SETGT)
23010         Shift = DAG.getNOT(DL, Shift, AType);
23011
23012       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23013     }
23014   }
23015
23016   unsigned ShCt = XType.getSizeInBits() - 1;
23017   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
23018     return SDValue();
23019
23020   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23021   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
23022   AddToWorklist(Shift.getNode());
23023
23024   if (XType.bitsGT(AType)) {
23025     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23026     AddToWorklist(Shift.getNode());
23027   }
23028
23029   if (CC == ISD::SETGT)
23030     Shift = DAG.getNOT(DL, Shift, AType);
23031
23032   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23033 }
23034
23035 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
23036 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
23037   SDValue N0 = N->getOperand(0);
23038   SDValue N1 = N->getOperand(1);
23039   SDValue N2 = N->getOperand(2);
23040   EVT VT = N->getValueType(0);
23041   SDLoc DL(N);
23042
23043   unsigned BinOpc = N1.getOpcode();
23044   if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
23045     return SDValue();
23046
23047   // The use checks are intentionally on SDNode because we may be dealing
23048   // with opcodes that produce more than one SDValue.
23049   // TODO: Do we really need to check N0 (the condition operand of the select)?
23050   //       But removing that clause could cause an infinite loop...
23051   if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
23052     return SDValue();
23053
23054   // Binops may include opcodes that return multiple values, so all values
23055   // must be created/propagated from the newly created binops below.
23056   SDVTList OpVTs = N1->getVTList();
23057
23058   // Fold select(cond, binop(x, y), binop(z, y))
23059   //  --> binop(select(cond, x, z), y)
23060   if (N1.getOperand(1) == N2.getOperand(1)) {
23061     SDValue NewSel =
23062         DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
23063     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
23064     NewBinOp->setFlags(N1->getFlags());
23065     NewBinOp->intersectFlagsWith(N2->getFlags());
23066     return NewBinOp;
23067   }
23068
23069   // Fold select(cond, binop(x, y), binop(x, z))
23070   //  --> binop(x, select(cond, y, z))
23071   // Second op VT might be different (e.g. shift amount type)
23072   if (N1.getOperand(0) == N2.getOperand(0) &&
23073       VT == N1.getOperand(1).getValueType() &&
23074       VT == N2.getOperand(1).getValueType()) {
23075     SDValue NewSel =
23076         DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
23077     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
23078     NewBinOp->setFlags(N1->getFlags());
23079     NewBinOp->intersectFlagsWith(N2->getFlags());
23080     return NewBinOp;
23081   }
23082
23083   // TODO: Handle isCommutativeBinOp patterns as well?
23084   return SDValue();
23085 }
23086
23087 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
23088 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
23089   SDValue N0 = N->getOperand(0);
23090   EVT VT = N->getValueType(0);
23091   bool IsFabs = N->getOpcode() == ISD::FABS;
23092   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
23093
23094   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
23095     return SDValue();
23096
23097   SDValue Int = N0.getOperand(0);
23098   EVT IntVT = Int.getValueType();
23099
23100   // The operand to cast should be integer.
23101   if (!IntVT.isInteger() || IntVT.isVector())
23102     return SDValue();
23103
23104   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
23105   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
23106   APInt SignMask;
23107   if (N0.getValueType().isVector()) {
23108     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
23109     // 0x7f...) per element and splat it.
23110     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
23111     if (IsFabs)
23112       SignMask = ~SignMask;
23113     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
23114   } else {
23115     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
23116     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
23117     if (IsFabs)
23118       SignMask = ~SignMask;
23119   }
23120   SDLoc DL(N0);
23121   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
23122                     DAG.getConstant(SignMask, DL, IntVT));
23123   AddToWorklist(Int.getNode());
23124   return DAG.getBitcast(VT, Int);
23125 }
23126
23127 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
23128 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
23129 /// in it. This may be a win when the constant is not otherwise available
23130 /// because it replaces two constant pool loads with one.
23131 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
23132     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
23133     ISD::CondCode CC) {
23134   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
23135     return SDValue();
23136
23137   // If we are before legalize types, we want the other legalization to happen
23138   // first (for example, to avoid messing with soft float).
23139   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
23140   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
23141   EVT VT = N2.getValueType();
23142   if (!TV || !FV || !TLI.isTypeLegal(VT))
23143     return SDValue();
23144
23145   // If a constant can be materialized without loads, this does not make sense.
23146   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
23147       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
23148       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
23149     return SDValue();
23150
23151   // If both constants have multiple uses, then we won't need to do an extra
23152   // load. The values are likely around in registers for other users.
23153   if (!TV->hasOneUse() && !FV->hasOneUse())
23154     return SDValue();
23155
23156   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
23157                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
23158   Type *FPTy = Elts[0]->getType();
23159   const DataLayout &TD = DAG.getDataLayout();
23160
23161   // Create a ConstantArray of the two constants.
23162   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
23163   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
23164                                       TD.getPrefTypeAlign(FPTy));
23165   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
23166
23167   // Get offsets to the 0 and 1 elements of the array, so we can select between
23168   // them.
23169   SDValue Zero = DAG.getIntPtrConstant(0, DL);
23170   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
23171   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
23172   SDValue Cond =
23173       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
23174   AddToWorklist(Cond.getNode());
23175   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
23176   AddToWorklist(CstOffset.getNode());
23177   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
23178   AddToWorklist(CPIdx.getNode());
23179   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
23180                      MachinePointerInfo::getConstantPool(
23181                          DAG.getMachineFunction()), Alignment);
23182 }
23183
23184 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
23185 /// where 'cond' is the comparison specified by CC.
23186 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
23187                                       SDValue N2, SDValue N3, ISD::CondCode CC,
23188                                       bool NotExtCompare) {
23189   // (x ? y : y) -> y.
23190   if (N2 == N3) return N2;
23191
23192   EVT CmpOpVT = N0.getValueType();
23193   EVT CmpResVT = getSetCCResultType(CmpOpVT);
23194   EVT VT = N2.getValueType();
23195   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
23196   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23197   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
23198
23199   // Determine if the condition we're dealing with is constant.
23200   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
23201     AddToWorklist(SCC.getNode());
23202     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
23203       // fold select_cc true, x, y -> x
23204       // fold select_cc false, x, y -> y
23205       return !(SCCC->isZero()) ? N2 : N3;
23206     }
23207   }
23208
23209   if (SDValue V =
23210           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
23211     return V;
23212
23213   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
23214     return V;
23215
23216   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
23217   // where y is has a single bit set.
23218   // A plaintext description would be, we can turn the SELECT_CC into an AND
23219   // when the condition can be materialized as an all-ones register.  Any
23220   // single bit-test can be materialized as an all-ones register with
23221   // shift-left and shift-right-arith.
23222   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
23223       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
23224     SDValue AndLHS = N0->getOperand(0);
23225     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
23226     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
23227       // Shift the tested bit over the sign bit.
23228       const APInt &AndMask = ConstAndRHS->getAPIntValue();
23229       unsigned ShCt = AndMask.getBitWidth() - 1;
23230       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
23231         SDValue ShlAmt =
23232           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
23233                           getShiftAmountTy(AndLHS.getValueType()));
23234         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
23235
23236         // Now arithmetic right shift it all the way over, so the result is
23237         // either all-ones, or zero.
23238         SDValue ShrAmt =
23239           DAG.getConstant(ShCt, SDLoc(Shl),
23240                           getShiftAmountTy(Shl.getValueType()));
23241         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
23242
23243         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
23244       }
23245     }
23246   }
23247
23248   // fold select C, 16, 0 -> shl C, 4
23249   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
23250   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
23251
23252   if ((Fold || Swap) &&
23253       TLI.getBooleanContents(CmpOpVT) ==
23254           TargetLowering::ZeroOrOneBooleanContent &&
23255       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
23256
23257     if (Swap) {
23258       CC = ISD::getSetCCInverse(CC, CmpOpVT);
23259       std::swap(N2C, N3C);
23260     }
23261
23262     // If the caller doesn't want us to simplify this into a zext of a compare,
23263     // don't do it.
23264     if (NotExtCompare && N2C->isOne())
23265       return SDValue();
23266
23267     SDValue Temp, SCC;
23268     // zext (setcc n0, n1)
23269     if (LegalTypes) {
23270       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
23271       if (VT.bitsLT(SCC.getValueType()))
23272         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
23273       else
23274         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23275     } else {
23276       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
23277       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23278     }
23279
23280     AddToWorklist(SCC.getNode());
23281     AddToWorklist(Temp.getNode());
23282
23283     if (N2C->isOne())
23284       return Temp;
23285
23286     unsigned ShCt = N2C->getAPIntValue().logBase2();
23287     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
23288       return SDValue();
23289
23290     // shl setcc result by log2 n2c
23291     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
23292                        DAG.getConstant(ShCt, SDLoc(Temp),
23293                                        getShiftAmountTy(Temp.getValueType())));
23294   }
23295
23296   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
23297   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
23298   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
23299   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
23300   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
23301   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
23302   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
23303   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
23304   if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
23305     SDValue ValueOnZero = N2;
23306     SDValue Count = N3;
23307     // If the condition is NE instead of E, swap the operands.
23308     if (CC == ISD::SETNE)
23309       std::swap(ValueOnZero, Count);
23310     // Check if the value on zero is a constant equal to the bits in the type.
23311     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
23312       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
23313         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
23314         // legal, combine to just cttz.
23315         if ((Count.getOpcode() == ISD::CTTZ ||
23316              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
23317             N0 == Count.getOperand(0) &&
23318             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
23319           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
23320         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
23321         // legal, combine to just ctlz.
23322         if ((Count.getOpcode() == ISD::CTLZ ||
23323              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
23324             N0 == Count.getOperand(0) &&
23325             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
23326           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
23327       }
23328     }
23329   }
23330
23331   // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
23332   // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
23333   if (!NotExtCompare && N1C && N2C && N3C &&
23334       N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
23335       ((N1C->isAllOnes() && CC == ISD::SETGT) ||
23336        (N1C->isZero() && CC == ISD::SETLT)) &&
23337       !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
23338     SDValue ASR = DAG.getNode(
23339         ISD::SRA, DL, CmpOpVT, N0,
23340         DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
23341     return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
23342                        DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
23343   }
23344
23345   if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
23346     return S;
23347
23348   return SDValue();
23349 }
23350
23351 /// This is a stub for TargetLowering::SimplifySetCC.
23352 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
23353                                    ISD::CondCode Cond, const SDLoc &DL,
23354                                    bool foldBooleans) {
23355   TargetLowering::DAGCombinerInfo
23356     DagCombineInfo(DAG, Level, false, this);
23357   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
23358 }
23359
23360 /// Given an ISD::SDIV node expressing a divide by constant, return
23361 /// a DAG expression to select that will generate the same value by multiplying
23362 /// by a magic number.
23363 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23364 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
23365   // when optimising for minimum size, we don't want to expand a div to a mul
23366   // and a shift.
23367   if (DAG.getMachineFunction().getFunction().hasMinSize())
23368     return SDValue();
23369
23370   SmallVector<SDNode *, 8> Built;
23371   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
23372     for (SDNode *N : Built)
23373       AddToWorklist(N);
23374     return S;
23375   }
23376
23377   return SDValue();
23378 }
23379
23380 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
23381 /// DAG expression that will generate the same value by right shifting.
23382 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
23383   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
23384   if (!C)
23385     return SDValue();
23386
23387   // Avoid division by zero.
23388   if (C->isZero())
23389     return SDValue();
23390
23391   SmallVector<SDNode *, 8> Built;
23392   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
23393     for (SDNode *N : Built)
23394       AddToWorklist(N);
23395     return S;
23396   }
23397
23398   return SDValue();
23399 }
23400
23401 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
23402 /// expression that will generate the same value by multiplying by a magic
23403 /// number.
23404 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23405 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
23406   // when optimising for minimum size, we don't want to expand a div to a mul
23407   // and a shift.
23408   if (DAG.getMachineFunction().getFunction().hasMinSize())
23409     return SDValue();
23410
23411   SmallVector<SDNode *, 8> Built;
23412   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
23413     for (SDNode *N : Built)
23414       AddToWorklist(N);
23415     return S;
23416   }
23417
23418   return SDValue();
23419 }
23420
23421 /// Determines the LogBase2 value for a non-null input value using the
23422 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
23423 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
23424   EVT VT = V.getValueType();
23425   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
23426   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
23427   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
23428   return LogBase2;
23429 }
23430
23431 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23432 /// For the reciprocal, we need to find the zero of the function:
23433 ///   F(X) = 1/X - A [which has a zero at X = 1/A]
23434 ///     =>
23435 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
23436 ///     does not require additional intermediate precision]
23437 /// For the last iteration, put numerator N into it to gain more precision:
23438 ///   Result = N X_i + X_i (N - N A X_i)
23439 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
23440                                       SDNodeFlags Flags) {
23441   if (LegalDAG)
23442     return SDValue();
23443
23444   // TODO: Handle extended types?
23445   EVT VT = Op.getValueType();
23446   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23447       VT.getScalarType() != MVT::f64)
23448     return SDValue();
23449
23450   // If estimates are explicitly disabled for this function, we're done.
23451   MachineFunction &MF = DAG.getMachineFunction();
23452   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
23453   if (Enabled == TLI.ReciprocalEstimate::Disabled)
23454     return SDValue();
23455
23456   // Estimates may be explicitly enabled for this type with a custom number of
23457   // refinement steps.
23458   int Iterations = TLI.getDivRefinementSteps(VT, MF);
23459   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
23460     AddToWorklist(Est.getNode());
23461
23462     SDLoc DL(Op);
23463     if (Iterations) {
23464       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
23465
23466       // Newton iterations: Est = Est + Est (N - Arg * Est)
23467       // If this is the last iteration, also multiply by the numerator.
23468       for (int i = 0; i < Iterations; ++i) {
23469         SDValue MulEst = Est;
23470
23471         if (i == Iterations - 1) {
23472           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
23473           AddToWorklist(MulEst.getNode());
23474         }
23475
23476         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
23477         AddToWorklist(NewEst.getNode());
23478
23479         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
23480                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
23481         AddToWorklist(NewEst.getNode());
23482
23483         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23484         AddToWorklist(NewEst.getNode());
23485
23486         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
23487         AddToWorklist(Est.getNode());
23488       }
23489     } else {
23490       // If no iterations are available, multiply with N.
23491       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
23492       AddToWorklist(Est.getNode());
23493     }
23494
23495     return Est;
23496   }
23497
23498   return SDValue();
23499 }
23500
23501 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23502 /// For the reciprocal sqrt, we need to find the zero of the function:
23503 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23504 ///     =>
23505 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
23506 /// As a result, we precompute A/2 prior to the iteration loop.
23507 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
23508                                          unsigned Iterations,
23509                                          SDNodeFlags Flags, bool Reciprocal) {
23510   EVT VT = Arg.getValueType();
23511   SDLoc DL(Arg);
23512   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
23513
23514   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
23515   // this entire sequence requires only one FP constant.
23516   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
23517   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
23518
23519   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
23520   for (unsigned i = 0; i < Iterations; ++i) {
23521     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
23522     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
23523     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
23524     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23525   }
23526
23527   // If non-reciprocal square root is requested, multiply the result by Arg.
23528   if (!Reciprocal)
23529     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
23530
23531   return Est;
23532 }
23533
23534 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23535 /// For the reciprocal sqrt, we need to find the zero of the function:
23536 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23537 ///     =>
23538 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
23539 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
23540                                          unsigned Iterations,
23541                                          SDNodeFlags Flags, bool Reciprocal) {
23542   EVT VT = Arg.getValueType();
23543   SDLoc DL(Arg);
23544   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
23545   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
23546
23547   // This routine must enter the loop below to work correctly
23548   // when (Reciprocal == false).
23549   assert(Iterations > 0);
23550
23551   // Newton iterations for reciprocal square root:
23552   // E = (E * -0.5) * ((A * E) * E + -3.0)
23553   for (unsigned i = 0; i < Iterations; ++i) {
23554     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
23555     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
23556     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
23557
23558     // When calculating a square root at the last iteration build:
23559     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
23560     // (notice a common subexpression)
23561     SDValue LHS;
23562     if (Reciprocal || (i + 1) < Iterations) {
23563       // RSQRT: LHS = (E * -0.5)
23564       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
23565     } else {
23566       // SQRT: LHS = (A * E) * -0.5
23567       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
23568     }
23569
23570     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
23571   }
23572
23573   return Est;
23574 }
23575
23576 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
23577 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
23578 /// Op can be zero.
23579 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
23580                                            bool Reciprocal) {
23581   if (LegalDAG)
23582     return SDValue();
23583
23584   // TODO: Handle extended types?
23585   EVT VT = Op.getValueType();
23586   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23587       VT.getScalarType() != MVT::f64)
23588     return SDValue();
23589
23590   // If estimates are explicitly disabled for this function, we're done.
23591   MachineFunction &MF = DAG.getMachineFunction();
23592   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
23593   if (Enabled == TLI.ReciprocalEstimate::Disabled)
23594     return SDValue();
23595
23596   // Estimates may be explicitly enabled for this type with a custom number of
23597   // refinement steps.
23598   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
23599
23600   bool UseOneConstNR = false;
23601   if (SDValue Est =
23602       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
23603                           Reciprocal)) {
23604     AddToWorklist(Est.getNode());
23605
23606     if (Iterations)
23607       Est = UseOneConstNR
23608             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
23609             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
23610     if (!Reciprocal) {
23611       SDLoc DL(Op);
23612       // Try the target specific test first.
23613       SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
23614
23615       // The estimate is now completely wrong if the input was exactly 0.0 or
23616       // possibly a denormal. Force the answer to 0.0 or value provided by
23617       // target for those cases.
23618       Est = DAG.getNode(
23619           Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
23620           Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
23621     }
23622     return Est;
23623   }
23624
23625   return SDValue();
23626 }
23627
23628 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23629   return buildSqrtEstimateImpl(Op, Flags, true);
23630 }
23631
23632 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23633   return buildSqrtEstimateImpl(Op, Flags, false);
23634 }
23635
23636 /// Return true if there is any possibility that the two addresses overlap.
23637 bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
23638
23639   struct MemUseCharacteristics {
23640     bool IsVolatile;
23641     bool IsAtomic;
23642     SDValue BasePtr;
23643     int64_t Offset;
23644     Optional<int64_t> NumBytes;
23645     MachineMemOperand *MMO;
23646   };
23647
23648   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
23649     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
23650       int64_t Offset = 0;
23651       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
23652         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
23653                      ? C->getSExtValue()
23654                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
23655                            ? -1 * C->getSExtValue()
23656                            : 0;
23657       uint64_t Size =
23658           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
23659       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
23660               Offset /*base offset*/,
23661               Optional<int64_t>(Size),
23662               LSN->getMemOperand()};
23663     }
23664     if (const auto *LN = cast<LifetimeSDNode>(N))
23665       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
23666               (LN->hasOffset()) ? LN->getOffset() : 0,
23667               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
23668                                 : Optional<int64_t>(),
23669               (MachineMemOperand *)nullptr};
23670     // Default.
23671     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
23672             (int64_t)0 /*offset*/,
23673             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
23674   };
23675
23676   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
23677                         MUC1 = getCharacteristics(Op1);
23678
23679   // If they are to the same address, then they must be aliases.
23680   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
23681       MUC0.Offset == MUC1.Offset)
23682     return true;
23683
23684   // If they are both volatile then they cannot be reordered.
23685   if (MUC0.IsVolatile && MUC1.IsVolatile)
23686     return true;
23687
23688   // Be conservative about atomics for the moment
23689   // TODO: This is way overconservative for unordered atomics (see D66309)
23690   if (MUC0.IsAtomic && MUC1.IsAtomic)
23691     return true;
23692
23693   if (MUC0.MMO && MUC1.MMO) {
23694     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23695         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23696       return false;
23697   }
23698
23699   // Try to prove that there is aliasing, or that there is no aliasing. Either
23700   // way, we can return now. If nothing can be proved, proceed with more tests.
23701   bool IsAlias;
23702   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
23703                                        DAG, IsAlias))
23704     return IsAlias;
23705
23706   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
23707   // either are not known.
23708   if (!MUC0.MMO || !MUC1.MMO)
23709     return true;
23710
23711   // If one operation reads from invariant memory, and the other may store, they
23712   // cannot alias. These should really be checking the equivalent of mayWrite,
23713   // but it only matters for memory nodes other than load /store.
23714   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23715       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23716     return false;
23717
23718   // If we know required SrcValue1 and SrcValue2 have relatively large
23719   // alignment compared to the size and offset of the access, we may be able
23720   // to prove they do not alias. This check is conservative for now to catch
23721   // cases created by splitting vector types, it only works when the offsets are
23722   // multiples of the size of the data.
23723   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
23724   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
23725   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
23726   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
23727   auto &Size0 = MUC0.NumBytes;
23728   auto &Size1 = MUC1.NumBytes;
23729   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
23730       Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
23731       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
23732       SrcValOffset1 % *Size1 == 0) {
23733     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
23734     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
23735
23736     // There is no overlap between these relatively aligned accesses of
23737     // similar size. Return no alias.
23738     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
23739       return false;
23740   }
23741
23742   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
23743                    ? CombinerGlobalAA
23744                    : DAG.getSubtarget().useAA();
23745 #ifndef NDEBUG
23746   if (CombinerAAOnlyFunc.getNumOccurrences() &&
23747       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
23748     UseAA = false;
23749 #endif
23750
23751   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
23752       Size0.hasValue() && Size1.hasValue()) {
23753     // Use alias analysis information.
23754     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
23755     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
23756     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
23757     if (AA->isNoAlias(
23758             MemoryLocation(MUC0.MMO->getValue(), Overlap0,
23759                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
23760             MemoryLocation(MUC1.MMO->getValue(), Overlap1,
23761                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
23762       return false;
23763   }
23764
23765   // Otherwise we have to assume they alias.
23766   return true;
23767 }
23768
23769 /// Walk up chain skipping non-aliasing memory nodes,
23770 /// looking for aliasing nodes and adding them to the Aliases vector.
23771 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
23772                                    SmallVectorImpl<SDValue> &Aliases) {
23773   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
23774   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
23775
23776   // Get alias information for node.
23777   // TODO: relax aliasing for unordered atomics (see D66309)
23778   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
23779
23780   // Starting off.
23781   Chains.push_back(OriginalChain);
23782   unsigned Depth = 0;
23783
23784   // Attempt to improve chain by a single step
23785   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
23786     switch (C.getOpcode()) {
23787     case ISD::EntryToken:
23788       // No need to mark EntryToken.
23789       C = SDValue();
23790       return true;
23791     case ISD::LOAD:
23792     case ISD::STORE: {
23793       // Get alias information for C.
23794       // TODO: Relax aliasing for unordered atomics (see D66309)
23795       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
23796                       cast<LSBaseSDNode>(C.getNode())->isSimple();
23797       if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
23798         // Look further up the chain.
23799         C = C.getOperand(0);
23800         return true;
23801       }
23802       // Alias, so stop here.
23803       return false;
23804     }
23805
23806     case ISD::CopyFromReg:
23807       // Always forward past past CopyFromReg.
23808       C = C.getOperand(0);
23809       return true;
23810
23811     case ISD::LIFETIME_START:
23812     case ISD::LIFETIME_END: {
23813       // We can forward past any lifetime start/end that can be proven not to
23814       // alias the memory access.
23815       if (!mayAlias(N, C.getNode())) {
23816         // Look further up the chain.
23817         C = C.getOperand(0);
23818         return true;
23819       }
23820       return false;
23821     }
23822     default:
23823       return false;
23824     }
23825   };
23826
23827   // Look at each chain and determine if it is an alias.  If so, add it to the
23828   // aliases list.  If not, then continue up the chain looking for the next
23829   // candidate.
23830   while (!Chains.empty()) {
23831     SDValue Chain = Chains.pop_back_val();
23832
23833     // Don't bother if we've seen Chain before.
23834     if (!Visited.insert(Chain.getNode()).second)
23835       continue;
23836
23837     // For TokenFactor nodes, look at each operand and only continue up the
23838     // chain until we reach the depth limit.
23839     //
23840     // FIXME: The depth check could be made to return the last non-aliasing
23841     // chain we found before we hit a tokenfactor rather than the original
23842     // chain.
23843     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
23844       Aliases.clear();
23845       Aliases.push_back(OriginalChain);
23846       return;
23847     }
23848
23849     if (Chain.getOpcode() == ISD::TokenFactor) {
23850       // We have to check each of the operands of the token factor for "small"
23851       // token factors, so we queue them up.  Adding the operands to the queue
23852       // (stack) in reverse order maintains the original order and increases the
23853       // likelihood that getNode will find a matching token factor (CSE.)
23854       if (Chain.getNumOperands() > 16) {
23855         Aliases.push_back(Chain);
23856         continue;
23857       }
23858       for (unsigned n = Chain.getNumOperands(); n;)
23859         Chains.push_back(Chain.getOperand(--n));
23860       ++Depth;
23861       continue;
23862     }
23863     // Everything else
23864     if (ImproveChain(Chain)) {
23865       // Updated Chain Found, Consider new chain if one exists.
23866       if (Chain.getNode())
23867         Chains.push_back(Chain);
23868       ++Depth;
23869       continue;
23870     }
23871     // No Improved Chain Possible, treat as Alias.
23872     Aliases.push_back(Chain);
23873   }
23874 }
23875
23876 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
23877 /// (aliasing node.)
23878 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
23879   if (OptLevel == CodeGenOpt::None)
23880     return OldChain;
23881
23882   // Ops for replacing token factor.
23883   SmallVector<SDValue, 8> Aliases;
23884
23885   // Accumulate all the aliases to this node.
23886   GatherAllAliases(N, OldChain, Aliases);
23887
23888   // If no operands then chain to entry token.
23889   if (Aliases.size() == 0)
23890     return DAG.getEntryNode();
23891
23892   // If a single operand then chain to it.  We don't need to revisit it.
23893   if (Aliases.size() == 1)
23894     return Aliases[0];
23895
23896   // Construct a custom tailored token factor.
23897   return DAG.getTokenFactor(SDLoc(N), Aliases);
23898 }
23899
23900 namespace {
23901 // TODO: Replace with with std::monostate when we move to C++17.
23902 struct UnitT { } Unit;
23903 bool operator==(const UnitT &, const UnitT &) { return true; }
23904 bool operator!=(const UnitT &, const UnitT &) { return false; }
23905 } // namespace
23906
23907 // This function tries to collect a bunch of potentially interesting
23908 // nodes to improve the chains of, all at once. This might seem
23909 // redundant, as this function gets called when visiting every store
23910 // node, so why not let the work be done on each store as it's visited?
23911 //
23912 // I believe this is mainly important because mergeConsecutiveStores
23913 // is unable to deal with merging stores of different sizes, so unless
23914 // we improve the chains of all the potential candidates up-front
23915 // before running mergeConsecutiveStores, it might only see some of
23916 // the nodes that will eventually be candidates, and then not be able
23917 // to go from a partially-merged state to the desired final
23918 // fully-merged state.
23919
23920 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
23921   SmallVector<StoreSDNode *, 8> ChainedStores;
23922   StoreSDNode *STChain = St;
23923   // Intervals records which offsets from BaseIndex have been covered. In
23924   // the common case, every store writes to the immediately previous address
23925   // space and thus merged with the previous interval at insertion time.
23926
23927   using IMap =
23928       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
23929   IMap::Allocator A;
23930   IMap Intervals(A);
23931
23932   // This holds the base pointer, index, and the offset in bytes from the base
23933   // pointer.
23934   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23935
23936   // We must have a base and an offset.
23937   if (!BasePtr.getBase().getNode())
23938     return false;
23939
23940   // Do not handle stores to undef base pointers.
23941   if (BasePtr.getBase().isUndef())
23942     return false;
23943
23944   // Do not handle stores to opaque types
23945   if (St->getMemoryVT().isZeroSized())
23946     return false;
23947
23948   // BaseIndexOffset assumes that offsets are fixed-size, which
23949   // is not valid for scalable vectors where the offsets are
23950   // scaled by `vscale`, so bail out early.
23951   if (St->getMemoryVT().isScalableVector())
23952     return false;
23953
23954   // Add ST's interval.
23955   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
23956
23957   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
23958     if (Chain->getMemoryVT().isScalableVector())
23959       return false;
23960
23961     // If the chain has more than one use, then we can't reorder the mem ops.
23962     if (!SDValue(Chain, 0)->hasOneUse())
23963       break;
23964     // TODO: Relax for unordered atomics (see D66309)
23965     if (!Chain->isSimple() || Chain->isIndexed())
23966       break;
23967
23968     // Find the base pointer and offset for this memory node.
23969     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
23970     // Check that the base pointer is the same as the original one.
23971     int64_t Offset;
23972     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
23973       break;
23974     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
23975     // Make sure we don't overlap with other intervals by checking the ones to
23976     // the left or right before inserting.
23977     auto I = Intervals.find(Offset);
23978     // If there's a next interval, we should end before it.
23979     if (I != Intervals.end() && I.start() < (Offset + Length))
23980       break;
23981     // If there's a previous interval, we should start after it.
23982     if (I != Intervals.begin() && (--I).stop() <= Offset)
23983       break;
23984     Intervals.insert(Offset, Offset + Length, Unit);
23985
23986     ChainedStores.push_back(Chain);
23987     STChain = Chain;
23988   }
23989
23990   // If we didn't find a chained store, exit.
23991   if (ChainedStores.size() == 0)
23992     return false;
23993
23994   // Improve all chained stores (St and ChainedStores members) starting from
23995   // where the store chain ended and return single TokenFactor.
23996   SDValue NewChain = STChain->getChain();
23997   SmallVector<SDValue, 8> TFOps;
23998   for (unsigned I = ChainedStores.size(); I;) {
23999     StoreSDNode *S = ChainedStores[--I];
24000     SDValue BetterChain = FindBetterChain(S, NewChain);
24001     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
24002         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
24003     TFOps.push_back(SDValue(S, 0));
24004     ChainedStores[I] = S;
24005   }
24006
24007   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
24008   SDValue BetterChain = FindBetterChain(St, NewChain);
24009   SDValue NewST;
24010   if (St->isTruncatingStore())
24011     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
24012                               St->getBasePtr(), St->getMemoryVT(),
24013                               St->getMemOperand());
24014   else
24015     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
24016                          St->getBasePtr(), St->getMemOperand());
24017
24018   TFOps.push_back(NewST);
24019
24020   // If we improved every element of TFOps, then we've lost the dependence on
24021   // NewChain to successors of St and we need to add it back to TFOps. Do so at
24022   // the beginning to keep relative order consistent with FindBetterChains.
24023   auto hasImprovedChain = [&](SDValue ST) -> bool {
24024     return ST->getOperand(0) != NewChain;
24025   };
24026   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
24027   if (AddNewChain)
24028     TFOps.insert(TFOps.begin(), NewChain);
24029
24030   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
24031   CombineTo(St, TF);
24032
24033   // Add TF and its operands to the worklist.
24034   AddToWorklist(TF.getNode());
24035   for (const SDValue &Op : TF->ops())
24036     AddToWorklist(Op.getNode());
24037   AddToWorklist(STChain);
24038   return true;
24039 }
24040
24041 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
24042   if (OptLevel == CodeGenOpt::None)
24043     return false;
24044
24045   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
24046
24047   // We must have a base and an offset.
24048   if (!BasePtr.getBase().getNode())
24049     return false;
24050
24051   // Do not handle stores to undef base pointers.
24052   if (BasePtr.getBase().isUndef())
24053     return false;
24054
24055   // Directly improve a chain of disjoint stores starting at St.
24056   if (parallelizeChainedStores(St))
24057     return true;
24058
24059   // Improve St's Chain..
24060   SDValue BetterChain = FindBetterChain(St, St->getChain());
24061   if (St->getChain() != BetterChain) {
24062     replaceStoreChain(St, BetterChain);
24063     return true;
24064   }
24065   return false;
24066 }
24067
24068 /// This is the entry point for the file.
24069 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
24070                            CodeGenOpt::Level OptLevel) {
24071   /// This is the main entry point to this class.
24072   DAGCombiner(*this, AA, OptLevel).Run(Level);
24073 }