llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  10 // both before and after the DAG is legalized.
  11 //
  12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  13 // primarily intended to handle simplification opportunities that are implicit
  14 // in the LLVM IR and exposed by the various codegen lowering phases.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #include "llvm/ADT/APFloat.h"
  19 #include "llvm/ADT/APInt.h"
  20 #include "llvm/ADT/ArrayRef.h"
  21 #include "llvm/ADT/DenseMap.h"
  22 #include "llvm/ADT/IntervalMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallBitVector.h"
  28 #include "llvm/ADT/SmallPtrSet.h"
  29 #include "llvm/ADT/SmallSet.h"
  30 #include "llvm/ADT/SmallVector.h"
  31 #include "llvm/ADT/Statistic.h"
  32 #include "llvm/Analysis/AliasAnalysis.h"
  33 #include "llvm/Analysis/MemoryLocation.h"
  34 #include "llvm/Analysis/TargetLibraryInfo.h"
  35 #include "llvm/Analysis/VectorUtils.h"
  36 #include "llvm/CodeGen/DAGCombine.h"
  37 #include "llvm/CodeGen/ISDOpcodes.h"
  38 #include "llvm/CodeGen/MachineFrameInfo.h"
  39 #include "llvm/CodeGen/MachineFunction.h"
  40 #include "llvm/CodeGen/MachineMemOperand.h"
  41 #include "llvm/CodeGen/RuntimeLibcalls.h"
  42 #include "llvm/CodeGen/SelectionDAG.h"
  43 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  44 #include "llvm/CodeGen/SelectionDAGNodes.h"
  45 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  46 #include "llvm/CodeGen/TargetLowering.h"
  47 #include "llvm/CodeGen/TargetRegisterInfo.h"
  48 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  49 #include "llvm/CodeGen/ValueTypes.h"
  50 #include "llvm/IR/Attributes.h"
  51 #include "llvm/IR/Constant.h"
  52 #include "llvm/IR/DataLayout.h"
  53 #include "llvm/IR/DerivedTypes.h"
  54 #include "llvm/IR/Function.h"
  55 #include "llvm/IR/LLVMContext.h"
  56 #include "llvm/IR/Metadata.h"
  57 #include "llvm/Support/Casting.h"
  58 #include "llvm/Support/CodeGen.h"
  59 #include "llvm/Support/CommandLine.h"
  60 #include "llvm/Support/Compiler.h"
  61 #include "llvm/Support/Debug.h"
  62 #include "llvm/Support/ErrorHandling.h"
  63 #include "llvm/Support/KnownBits.h"
  64 #include "llvm/Support/MachineValueType.h"
  65 #include "llvm/Support/MathExtras.h"
  66 #include "llvm/Support/raw_ostream.h"
  67 #include "llvm/Target/TargetMachine.h"
  68 #include "llvm/Target/TargetOptions.h"
  69 #include <algorithm>
  70 #include <cassert>
  71 #include <cstdint>
  72 #include <functional>
  73 #include <iterator>
  74 #include <string>
  75 #include <tuple>
  76 #include <utility>
  77
  78 using namespace llvm;
  79
  80 #define DEBUG_TYPE "dagcombine"
  81
  82 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  85 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  86 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  87 STATISTIC(SlicedLoads, "Number of load sliced");
  88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  89
  90 static cl::opt<bool>
  91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  92                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  93
  94 static cl::opt<bool>
  95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  96         cl::desc("Enable DAG combiner's use of TBAA"));
  97
  98 #ifndef NDEBUG
  99 static cl::opt<std::string>
 100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
 101                    cl::desc("Only use DAG-combiner alias analysis in this"
 102                             " function"));
 103 #endif
 104
 105 /// Hidden option to stress test load slicing, i.e., when this option
 106 /// is enabled, load slicing bypasses most of its profitability guards.
 107 static cl::opt<bool>
 108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 109                   cl::desc("Bypass the profitability model of load slicing"),
 110                   cl::init(false));
 111
 112 static cl::opt<bool>
 113   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 114                     cl::desc("DAG combiner may split indexing from loads"));
 115
 116 static cl::opt<bool>
 117     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
 118                        cl::desc("DAG combiner enable merging multiple stores "
 119                                 "into a wider store"));
 120
 121 static cl::opt<unsigned> TokenFactorInlineLimit(
 122     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
 123     cl::desc("Limit the number of operands to inline for Token Factors"));
 124
 125 static cl::opt<unsigned> StoreMergeDependenceLimit(
 126     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
 127     cl::desc("Limit the number of times for the same StoreNode and RootNode "
 128              "to bail out in store merging dependence check"));
 129
 130 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
 131     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
 132     cl::desc("DAG cominber enable reducing the width of load/op/store "
 133              "sequence"));
 134
 135 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
 136     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
 137     cl::desc("DAG cominber enable load/<replace bytes>/store with "
 138              "a narrower store"));
 139
 140 namespace {
 141
 142   class DAGCombiner {
 143     SelectionDAG &DAG;
 144     const TargetLowering &TLI;
 145     const SelectionDAGTargetInfo *STI;
 146     CombineLevel Level;
 147     CodeGenOpt::Level OptLevel;
 148     bool LegalDAG = false;
 149     bool LegalOperations = false;
 150     bool LegalTypes = false;
 151     bool ForCodeSize;
 152     bool DisableGenericCombines;
 153
 154     /// Worklist of all of the nodes that need to be simplified.
 155     ///
 156     /// This must behave as a stack -- new nodes to process are pushed onto the
 157     /// back and when processing we pop off of the back.
 158     ///
 159     /// The worklist will not contain duplicates but may contain null entries
 160     /// due to nodes being deleted from the underlying DAG.
 161     SmallVector<SDNode *, 64> Worklist;
 162
 163     /// Mapping from an SDNode to its position on the worklist.
 164     ///
 165     /// This is used to find and remove nodes from the worklist (by nulling
 166     /// them) when they are deleted from the underlying DAG. It relies on
 167     /// stable indices of nodes within the worklist.
 168     DenseMap<SDNode *, unsigned> WorklistMap;
 169     /// This records all nodes attempted to add to the worklist since we
 170     /// considered a new worklist entry. As we keep do not add duplicate nodes
 171     /// in the worklist, this is different from the tail of the worklist.
 172     SmallSetVector<SDNode *, 32> PruningList;
 173
 174     /// Set of nodes which have been combined (at least once).
 175     ///
 176     /// This is used to allow us to reliably add any operands of a DAG node
 177     /// which have not yet been combined to the worklist.
 178     SmallPtrSet<SDNode *, 32> CombinedNodes;
 179
 180     /// Map from candidate StoreNode to the pair of RootNode and count.
 181     /// The count is used to track how many times we have seen the StoreNode
 182     /// with the same RootNode bail out in dependence check. If we have seen
 183     /// the bail out for the same pair many times over a limit, we won't
 184     /// consider the StoreNode with the same RootNode as store merging
 185     /// candidate again.
 186     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
 187
 188     // AA - Used for DAG load/store alias analysis.
 189     AliasAnalysis *AA;
 190
 191     /// When an instruction is simplified, add all users of the instruction to
 192     /// the work lists because they might get more simplified now.
 193     void AddUsersToWorklist(SDNode *N) {
 194       for (SDNode *Node : N->uses())
 195         AddToWorklist(Node);
 196     }
 197
 198     /// Convenient shorthand to add a node and all of its user to the worklist.
 199     void AddToWorklistWithUsers(SDNode *N) {
 200       AddUsersToWorklist(N);
 201       AddToWorklist(N);
 202     }
 203
 204     // Prune potentially dangling nodes. This is called after
 205     // any visit to a node, but should also be called during a visit after any
 206     // failed combine which may have created a DAG node.
 207     void clearAddedDanglingWorklistEntries() {
 208       // Check any nodes added to the worklist to see if they are prunable.
 209       while (!PruningList.empty()) {
 210         auto *N = PruningList.pop_back_val();
 211         if (N->use_empty())
 212           recursivelyDeleteUnusedNodes(N);
 213       }
 214     }
 215
 216     SDNode *getNextWorklistEntry() {
 217       // Before we do any work, remove nodes that are not in use.
 218       clearAddedDanglingWorklistEntries();
 219       SDNode *N = nullptr;
 220       // The Worklist holds the SDNodes in order, but it may contain null
 221       // entries.
 222       while (!N && !Worklist.empty()) {
 223         N = Worklist.pop_back_val();
 224       }
 225
 226       if (N) {
 227         bool GoodWorklistEntry = WorklistMap.erase(N);
 228         (void)GoodWorklistEntry;
 229         assert(GoodWorklistEntry &&
 230                "Found a worklist entry without a corresponding map entry!");
 231       }
 232       return N;
 233     }
 234
 235     /// Call the node-specific routine that folds each particular type of node.
 236     SDValue visit(SDNode *N);
 237
 238   public:
 239     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 240         : DAG(D), TLI(D.getTargetLoweringInfo()),
 241           STI(D.getSubtarget().getSelectionDAGInfo()),
 242           Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
 243       ForCodeSize = DAG.shouldOptForSize();
 244       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
 245
 246       MaximumLegalStoreInBits = 0;
 247       // We use the minimum store size here, since that's all we can guarantee
 248       // for the scalable vector types.
 249       for (MVT VT : MVT::all_valuetypes())
 250         if (EVT(VT).isSimple() && VT != MVT::Other &&
 251             TLI.isTypeLegal(EVT(VT)) &&
 252             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
 253           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
 254     }
 255
 256     void ConsiderForPruning(SDNode *N) {
 257       // Mark this for potential pruning.
 258       PruningList.insert(N);
 259     }
 260
 261     /// Add to the worklist making sure its instance is at the back (next to be
 262     /// processed.)
 263     void AddToWorklist(SDNode *N) {
 264       assert(N->getOpcode() != ISD::DELETED_NODE &&
 265              "Deleted Node added to Worklist");
 266
 267       // Skip handle nodes as they can't usefully be combined and confuse the
 268       // zero-use deletion strategy.
 269       if (N->getOpcode() == ISD::HANDLENODE)
 270         return;
 271
 272       ConsiderForPruning(N);
 273
 274       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 275         Worklist.push_back(N);
 276     }
 277
 278     /// Remove all instances of N from the worklist.
 279     void removeFromWorklist(SDNode *N) {
 280       CombinedNodes.erase(N);
 281       PruningList.remove(N);
 282       StoreRootCountMap.erase(N);
 283
 284       auto It = WorklistMap.find(N);
 285       if (It == WorklistMap.end())
 286         return; // Not in the worklist.
 287
 288       // Null out the entry rather than erasing it to avoid a linear operation.
 289       Worklist[It->second] = nullptr;
 290       WorklistMap.erase(It);
 291     }
 292
 293     void deleteAndRecombine(SDNode *N);
 294     bool recursivelyDeleteUnusedNodes(SDNode *N);
 295
 296     /// Replaces all uses of the results of one DAG node with new values.
 297     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 298                       bool AddTo = true);
 299
 300     /// Replaces all uses of the results of one DAG node with new values.
 301     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 302       return CombineTo(N, &Res, 1, AddTo);
 303     }
 304
 305     /// Replaces all uses of the results of one DAG node with new values.
 306     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 307                       bool AddTo = true) {
 308       SDValue To[] = { Res0, Res1 };
 309       return CombineTo(N, To, 2, AddTo);
 310     }
 311
 312     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 313
 314   private:
 315     unsigned MaximumLegalStoreInBits;
 316
 317     /// Check the specified integer node value to see if it can be simplified or
 318     /// if things it uses can be simplified by bit propagation.
 319     /// If so, return true.
 320     bool SimplifyDemandedBits(SDValue Op) {
 321       unsigned BitWidth = Op.getScalarValueSizeInBits();
 322       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
 323       return SimplifyDemandedBits(Op, DemandedBits);
 324     }
 325
 326     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
 327       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
 328       KnownBits Known;
 329       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
 330         return false;
 331
 332       // Revisit the node.
 333       AddToWorklist(Op.getNode());
 334
 335       CommitTargetLoweringOpt(TLO);
 336       return true;
 337     }
 338
 339     /// Check the specified vector node value to see if it can be simplified or
 340     /// if things it uses can be simplified as it only uses some of the
 341     /// elements. If so, return true.
 342     bool SimplifyDemandedVectorElts(SDValue Op) {
 343       // TODO: For now just pretend it cannot be simplified.
 344       if (Op.getValueType().isScalableVector())
 345         return false;
 346
 347       unsigned NumElts = Op.getValueType().getVectorNumElements();
 348       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 349       return SimplifyDemandedVectorElts(Op, DemandedElts);
 350     }
 351
 352     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 353                               const APInt &DemandedElts,
 354                               bool AssumeSingleUse = false);
 355     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
 356                                     bool AssumeSingleUse = false);
 357
 358     bool CombineToPreIndexedLoadStore(SDNode *N);
 359     bool CombineToPostIndexedLoadStore(SDNode *N);
 360     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 361     bool SliceUpLoad(SDNode *N);
 362
 363     // Scalars have size 0 to distinguish from singleton vectors.
 364     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
 365     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
 366     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
 367
 368     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 369     ///   load.
 370     ///
 371     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 372     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 373     /// \param EltNo index of the vector element to load.
 374     /// \param OriginalLoad load that EVE came from to be replaced.
 375     /// \returns EVE on success SDValue() on failure.
 376     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 377                                          SDValue EltNo,
 378                                          LoadSDNode *OriginalLoad);
 379     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 380     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 381     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 382     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 383     SDValue PromoteIntBinOp(SDValue Op);
 384     SDValue PromoteIntShiftOp(SDValue Op);
 385     SDValue PromoteExtend(SDValue Op);
 386     bool PromoteLoad(SDValue Op);
 387
 388     /// Call the node-specific routine that knows how to fold each
 389     /// particular type of node. If that doesn't do anything, try the
 390     /// target-specific DAG combines.
 391     SDValue combine(SDNode *N);
 392
 393     // Visitation implementation - Implement dag node combining for different
 394     // node types.  The semantics are as follows:
 395     // Return Value:
 396     //   SDValue.getNode() == 0 - No change was made
 397     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 398     //   otherwise              - N should be replaced by the returned Operand.
 399     //
 400     SDValue visitTokenFactor(SDNode *N);
 401     SDValue visitMERGE_VALUES(SDNode *N);
 402     SDValue visitADD(SDNode *N);
 403     SDValue visitADDLike(SDNode *N);
 404     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
 405     SDValue visitSUB(SDNode *N);
 406     SDValue visitADDSAT(SDNode *N);
 407     SDValue visitSUBSAT(SDNode *N);
 408     SDValue visitADDC(SDNode *N);
 409     SDValue visitADDO(SDNode *N);
 410     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 411     SDValue visitSUBC(SDNode *N);
 412     SDValue visitSUBO(SDNode *N);
 413     SDValue visitADDE(SDNode *N);
 414     SDValue visitADDCARRY(SDNode *N);
 415     SDValue visitSADDO_CARRY(SDNode *N);
 416     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 417     SDValue visitSUBE(SDNode *N);
 418     SDValue visitSUBCARRY(SDNode *N);
 419     SDValue visitSSUBO_CARRY(SDNode *N);
 420     SDValue visitMUL(SDNode *N);
 421     SDValue visitMULFIX(SDNode *N);
 422     SDValue useDivRem(SDNode *N);
 423     SDValue visitSDIV(SDNode *N);
 424     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 425     SDValue visitUDIV(SDNode *N);
 426     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 427     SDValue visitREM(SDNode *N);
 428     SDValue visitMULHU(SDNode *N);
 429     SDValue visitMULHS(SDNode *N);
 430     SDValue visitSMUL_LOHI(SDNode *N);
 431     SDValue visitUMUL_LOHI(SDNode *N);
 432     SDValue visitMULO(SDNode *N);
 433     SDValue visitIMINMAX(SDNode *N);
 434     SDValue visitAND(SDNode *N);
 435     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 436     SDValue visitOR(SDNode *N);
 437     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 438     SDValue visitXOR(SDNode *N);
 439     SDValue SimplifyVBinOp(SDNode *N);
 440     SDValue visitSHL(SDNode *N);
 441     SDValue visitSRA(SDNode *N);
 442     SDValue visitSRL(SDNode *N);
 443     SDValue visitFunnelShift(SDNode *N);
 444     SDValue visitRotate(SDNode *N);
 445     SDValue visitABS(SDNode *N);
 446     SDValue visitBSWAP(SDNode *N);
 447     SDValue visitBITREVERSE(SDNode *N);
 448     SDValue visitCTLZ(SDNode *N);
 449     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 450     SDValue visitCTTZ(SDNode *N);
 451     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 452     SDValue visitCTPOP(SDNode *N);
 453     SDValue visitSELECT(SDNode *N);
 454     SDValue visitVSELECT(SDNode *N);
 455     SDValue visitSELECT_CC(SDNode *N);
 456     SDValue visitSETCC(SDNode *N);
 457     SDValue visitSETCCCARRY(SDNode *N);
 458     SDValue visitSIGN_EXTEND(SDNode *N);
 459     SDValue visitZERO_EXTEND(SDNode *N);
 460     SDValue visitANY_EXTEND(SDNode *N);
 461     SDValue visitAssertExt(SDNode *N);
 462     SDValue visitAssertAlign(SDNode *N);
 463     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 464     SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
 465     SDValue visitTRUNCATE(SDNode *N);
 466     SDValue visitBITCAST(SDNode *N);
 467     SDValue visitFREEZE(SDNode *N);
 468     SDValue visitBUILD_PAIR(SDNode *N);
 469     SDValue visitFADD(SDNode *N);
 470     SDValue visitSTRICT_FADD(SDNode *N);
 471     SDValue visitFSUB(SDNode *N);
 472     SDValue visitFMUL(SDNode *N);
 473     SDValue visitFMA(SDNode *N);
 474     SDValue visitFDIV(SDNode *N);
 475     SDValue visitFREM(SDNode *N);
 476     SDValue visitFSQRT(SDNode *N);
 477     SDValue visitFCOPYSIGN(SDNode *N);
 478     SDValue visitFPOW(SDNode *N);
 479     SDValue visitSINT_TO_FP(SDNode *N);
 480     SDValue visitUINT_TO_FP(SDNode *N);
 481     SDValue visitFP_TO_SINT(SDNode *N);
 482     SDValue visitFP_TO_UINT(SDNode *N);
 483     SDValue visitFP_ROUND(SDNode *N);
 484     SDValue visitFP_EXTEND(SDNode *N);
 485     SDValue visitFNEG(SDNode *N);
 486     SDValue visitFABS(SDNode *N);
 487     SDValue visitFCEIL(SDNode *N);
 488     SDValue visitFTRUNC(SDNode *N);
 489     SDValue visitFFLOOR(SDNode *N);
 490     SDValue visitFMINNUM(SDNode *N);
 491     SDValue visitFMAXNUM(SDNode *N);
 492     SDValue visitFMINIMUM(SDNode *N);
 493     SDValue visitFMAXIMUM(SDNode *N);
 494     SDValue visitBRCOND(SDNode *N);
 495     SDValue visitBR_CC(SDNode *N);
 496     SDValue visitLOAD(SDNode *N);
 497
 498     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 499     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 500
 501     SDValue visitSTORE(SDNode *N);
 502     SDValue visitLIFETIME_END(SDNode *N);
 503     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 504     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 505     SDValue visitBUILD_VECTOR(SDNode *N);
 506     SDValue visitCONCAT_VECTORS(SDNode *N);
 507     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 508     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 509     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 510     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 511     SDValue visitMLOAD(SDNode *N);
 512     SDValue visitMSTORE(SDNode *N);
 513     SDValue visitMGATHER(SDNode *N);
 514     SDValue visitMSCATTER(SDNode *N);
 515     SDValue visitFP_TO_FP16(SDNode *N);
 516     SDValue visitFP16_TO_FP(SDNode *N);
 517     SDValue visitVECREDUCE(SDNode *N);
 518
 519     SDValue visitFADDForFMACombine(SDNode *N);
 520     SDValue visitFSUBForFMACombine(SDNode *N);
 521     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 522
 523     SDValue XformToShuffleWithZero(SDNode *N);
 524     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
 525                                                     const SDLoc &DL, SDValue N0,
 526                                                     SDValue N1);
 527     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
 528                                       SDValue N1);
 529     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 530                            SDValue N1, SDNodeFlags Flags);
 531
 532     SDValue visitShiftByConstant(SDNode *N);
 533
 534     SDValue foldSelectOfConstants(SDNode *N);
 535     SDValue foldVSelectOfConstants(SDNode *N);
 536     SDValue foldBinOpIntoSelect(SDNode *BO);
 537     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 538     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
 539     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 540     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 541                              SDValue N2, SDValue N3, ISD::CondCode CC,
 542                              bool NotExtCompare = false);
 543     SDValue convertSelectOfFPConstantsToLoadOffset(
 544         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
 545         ISD::CondCode CC);
 546     SDValue foldSignChangeInBitcast(SDNode *N);
 547     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 548                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 549     SDValue foldSelectOfBinops(SDNode *N);
 550     SDValue foldSextSetcc(SDNode *N);
 551     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 552                               const SDLoc &DL);
 553     SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
 554     SDValue unfoldMaskedMerge(SDNode *N);
 555     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 556     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 557                           const SDLoc &DL, bool foldBooleans);
 558     SDValue rebuildSetCC(SDValue N);
 559
 560     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 561                            SDValue &CC, bool MatchStrict = false) const;
 562     bool isOneUseSetCC(SDValue N) const;
 563
 564     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 565                                          unsigned HiOp);
 566     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 567     SDValue CombineExtLoad(SDNode *N);
 568     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 569     SDValue combineRepeatedFPDivisors(SDNode *N);
 570     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 571     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 572     SDValue BuildSDIV(SDNode *N);
 573     SDValue BuildSDIVPow2(SDNode *N);
 574     SDValue BuildUDIV(SDNode *N);
 575     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 576     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
 577     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 578     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 579     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 580     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 581                                 SDNodeFlags Flags, bool Reciprocal);
 582     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 583                                 SDNodeFlags Flags, bool Reciprocal);
 584     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 585                                bool DemandHighBits = true);
 586     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 587     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 588                               SDValue InnerPos, SDValue InnerNeg,
 589                               unsigned PosOpcode, unsigned NegOpcode,
 590                               const SDLoc &DL);
 591     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
 592                               SDValue InnerPos, SDValue InnerNeg,
 593                               unsigned PosOpcode, unsigned NegOpcode,
 594                               const SDLoc &DL);
 595     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 596     SDValue MatchLoadCombine(SDNode *N);
 597     SDValue mergeTruncStores(StoreSDNode *N);
 598     SDValue ReduceLoadWidth(SDNode *N);
 599     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 600     SDValue splitMergedValStore(StoreSDNode *ST);
 601     SDValue TransformFPLoadStorePair(SDNode *N);
 602     SDValue convertBuildVecZextToZext(SDNode *N);
 603     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 604     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
 605     SDValue reduceBuildVecToShuffle(SDNode *N);
 606     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 607                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 608                                   SDValue VecIn2, unsigned LeftIdx,
 609                                   bool DidSplitVec);
 610     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 611
 612     /// Walk up chain skipping non-aliasing memory nodes,
 613     /// looking for aliasing nodes and adding them to the Aliases vector.
 614     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 615                           SmallVectorImpl<SDValue> &Aliases);
 616
 617     /// Return true if there is any possibility that the two addresses overlap.
 618     bool isAlias(SDNode *Op0, SDNode *Op1) const;
 619
 620     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 621     /// chain (aliasing node.)
 622     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 623
 624     /// Try to replace a store and any possibly adjacent stores on
 625     /// consecutive chains with better chains. Return true only if St is
 626     /// replaced.
 627     ///
 628     /// Notice that other chains may still be replaced even if the function
 629     /// returns false.
 630     bool findBetterNeighborChains(StoreSDNode *St);
 631
 632     // Helper for findBetterNeighborChains. Walk up store chain add additional
 633     // chained stores that do not overlap and can be parallelized.
 634     bool parallelizeChainedStores(StoreSDNode *St);
 635
 636     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 637     /// is located in a sequence of memory operations connected by a chain.
 638     struct MemOpLink {
 639       // Ptr to the mem node.
 640       LSBaseSDNode *MemNode;
 641
 642       // Offset from the base ptr.
 643       int64_t OffsetFromBase;
 644
 645       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 646           : MemNode(N), OffsetFromBase(Offset) {}
 647     };
 648
 649     // Classify the origin of a stored value.
 650     enum class StoreSource { Unknown, Constant, Extract, Load };
 651     StoreSource getStoreSource(SDValue StoreVal) {
 652       switch (StoreVal.getOpcode()) {
 653       case ISD::Constant:
 654       case ISD::ConstantFP:
 655         return StoreSource::Constant;
 656       case ISD::EXTRACT_VECTOR_ELT:
 657       case ISD::EXTRACT_SUBVECTOR:
 658         return StoreSource::Extract;
 659       case ISD::LOAD:
 660         return StoreSource::Load;
 661       default:
 662         return StoreSource::Unknown;
 663       }
 664     }
 665
 666     /// This is a helper function for visitMUL to check the profitability
 667     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 668     /// MulNode is the original multiply, AddNode is (add x, c1),
 669     /// and ConstNode is c2.
 670     bool isMulAddWithConstProfitable(SDNode *MulNode,
 671                                      SDValue &AddNode,
 672                                      SDValue &ConstNode);
 673
 674     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 675     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 676     /// the type of the loaded value to be extended.
 677     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 678                           EVT LoadResultTy, EVT &ExtVT);
 679
 680     /// Helper function to calculate whether the given Load/Store can have its
 681     /// width reduced to ExtVT.
 682     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 683                            EVT &MemVT, unsigned ShAmt = 0);
 684
 685     /// Used by BackwardsPropagateMask to find suitable loads.
 686     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
 687                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 688                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 689     /// Attempt to propagate a given AND node back to load leaves so that they
 690     /// can be combined into narrow loads.
 691     bool BackwardsPropagateMask(SDNode *N);
 692
 693     /// Helper function for mergeConsecutiveStores which merges the component
 694     /// store chains.
 695     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 696                                 unsigned NumStores);
 697
 698     /// This is a helper function for mergeConsecutiveStores. When the source
 699     /// elements of the consecutive stores are all constants or all extracted
 700     /// vector elements, try to merge them into one larger store introducing
 701     /// bitcasts if necessary.  \return True if a merged store was created.
 702     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 703                                          EVT MemVT, unsigned NumStores,
 704                                          bool IsConstantSrc, bool UseVector,
 705                                          bool UseTrunc);
 706
 707     /// This is a helper function for mergeConsecutiveStores. Stores that
 708     /// potentially may be merged with St are placed in StoreNodes. RootNode is
 709     /// a chain predecessor to all store candidates.
 710     void getStoreMergeCandidates(StoreSDNode *St,
 711                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 712                                  SDNode *&Root);
 713
 714     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
 715     /// have indirect dependency through their operands. RootNode is the
 716     /// predecessor to all stores calculated by getStoreMergeCandidates and is
 717     /// used to prune the dependency check. \return True if safe to merge.
 718     bool checkMergeStoreCandidatesForDependencies(
 719         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 720         SDNode *RootNode);
 721
 722     /// This is a helper function for mergeConsecutiveStores. Given a list of
 723     /// store candidates, find the first N that are consecutive in memory.
 724     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
 725     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
 726                                   int64_t ElementSizeBytes) const;
 727
 728     /// This is a helper function for mergeConsecutiveStores. It is used for
 729     /// store chains that are composed entirely of constant values.
 730     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
 731                                   unsigned NumConsecutiveStores,
 732                                   EVT MemVT, SDNode *Root, bool AllowVectors);
 733
 734     /// This is a helper function for mergeConsecutiveStores. It is used for
 735     /// store chains that are composed entirely of extracted vector elements.
 736     /// When extracting multiple vector elements, try to store them in one
 737     /// vector store rather than a sequence of scalar stores.
 738     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
 739                                  unsigned NumConsecutiveStores, EVT MemVT,
 740                                  SDNode *Root);
 741
 742     /// This is a helper function for mergeConsecutiveStores. It is used for
 743     /// store chains that are composed entirely of loaded values.
 744     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
 745                               unsigned NumConsecutiveStores, EVT MemVT,
 746                               SDNode *Root, bool AllowVectors,
 747                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
 748
 749     /// Merge consecutive store operations into a wide store.
 750     /// This optimization uses wide integers or vectors when possible.
 751     /// \return true if stores were merged.
 752     bool mergeConsecutiveStores(StoreSDNode *St);
 753
 754     /// Try to transform a truncation where C is a constant:
 755     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 756     ///
 757     /// \p N needs to be a truncation and its first operand an AND. Other
 758     /// requirements are checked by the function (e.g. that trunc is
 759     /// single-use) and if missed an empty SDValue is returned.
 760     SDValue distributeTruncateThroughAnd(SDNode *N);
 761
 762     /// Helper function to determine whether the target supports operation
 763     /// given by \p Opcode for type \p VT, that is, whether the operation
 764     /// is legal or custom before legalizing operations, and whether is
 765     /// legal (but not custom) after legalization.
 766     bool hasOperation(unsigned Opcode, EVT VT) {
 767       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
 768     }
 769
 770   public:
 771     /// Runs the dag combiner on all nodes in the work list
 772     void Run(CombineLevel AtLevel);
 773
 774     SelectionDAG &getDAG() const { return DAG; }
 775
 776     /// Returns a type large enough to hold any valid shift amount - before type
 777     /// legalization these can be huge.
 778     EVT getShiftAmountTy(EVT LHSTy) {
 779       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 780       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 781     }
 782
 783     /// This method returns true if we are running before type legalization or
 784     /// if the specified VT is legal.
 785     bool isTypeLegal(const EVT &VT) {
 786       if (!LegalTypes) return true;
 787       return TLI.isTypeLegal(VT);
 788     }
 789
 790     /// Convenience wrapper around TargetLowering::getSetCCResultType
 791     EVT getSetCCResultType(EVT VT) const {
 792       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 793     }
 794
 795     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 796                          SDValue OrigLoad, SDValue ExtLoad,
 797                          ISD::NodeType ExtType);
 798   };
 799
 800 /// This class is a DAGUpdateListener that removes any deleted
 801 /// nodes from the worklist.
 802 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 803   DAGCombiner &DC;
 804
 805 public:
 806   explicit WorklistRemover(DAGCombiner &dc)
 807     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 808
 809   void NodeDeleted(SDNode *N, SDNode *E) override {
 810     DC.removeFromWorklist(N);
 811   }
 812 };
 813
 814 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
 815   DAGCombiner &DC;
 816
 817 public:
 818   explicit WorklistInserter(DAGCombiner &dc)
 819       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 820
 821   // FIXME: Ideally we could add N to the worklist, but this causes exponential
 822   //        compile time costs in large DAGs, e.g. Halide.
 823   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
 824 };
 825
 826 } // end anonymous namespace
 827
 828 //===----------------------------------------------------------------------===//
 829 //  TargetLowering::DAGCombinerInfo implementation
 830 //===----------------------------------------------------------------------===//
 831
 832 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 833   ((DAGCombiner*)DC)->AddToWorklist(N);
 834 }
 835
 836 SDValue TargetLowering::DAGCombinerInfo::
 837 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 838   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 839 }
 840
 841 SDValue TargetLowering::DAGCombinerInfo::
 842 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 843   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 844 }
 845
 846 SDValue TargetLowering::DAGCombinerInfo::
 847 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 848   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 849 }
 850
 851 bool TargetLowering::DAGCombinerInfo::
 852 recursivelyDeleteUnusedNodes(SDNode *N) {
 853   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
 854 }
 855
 856 void TargetLowering::DAGCombinerInfo::
 857 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 858   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 859 }
 860
 861 //===----------------------------------------------------------------------===//
 862 // Helper Functions
 863 //===----------------------------------------------------------------------===//
 864
 865 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 866   removeFromWorklist(N);
 867
 868   // If the operands of this node are only used by the node, they will now be
 869   // dead. Make sure to re-visit them and recursively delete dead nodes.
 870   for (const SDValue &Op : N->ops())
 871     // For an operand generating multiple values, one of the values may
 872     // become dead allowing further simplification (e.g. split index
 873     // arithmetic from an indexed load).
 874     if (Op->hasOneUse() || Op->getNumValues() > 1)
 875       AddToWorklist(Op.getNode());
 876
 877   DAG.DeleteNode(N);
 878 }
 879
 880 // APInts must be the same size for most operations, this helper
 881 // function zero extends the shorter of the pair so that they match.
 882 // We provide an Offset so that we can create bitwidths that won't overflow.
 883 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 884   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 885   LHS = LHS.zextOrSelf(Bits);
 886   RHS = RHS.zextOrSelf(Bits);
 887 }
 888
 889 // Return true if this node is a setcc, or is a select_cc
 890 // that selects between the target values used for true and false, making it
 891 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 892 // the appropriate nodes based on the type of node we are checking. This
 893 // simplifies life a bit for the callers.
 894 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 895                                     SDValue &CC, bool MatchStrict) const {
 896   if (N.getOpcode() == ISD::SETCC) {
 897     LHS = N.getOperand(0);
 898     RHS = N.getOperand(1);
 899     CC  = N.getOperand(2);
 900     return true;
 901   }
 902
 903   if (MatchStrict &&
 904       (N.getOpcode() == ISD::STRICT_FSETCC ||
 905        N.getOpcode() == ISD::STRICT_FSETCCS)) {
 906     LHS = N.getOperand(1);
 907     RHS = N.getOperand(2);
 908     CC  = N.getOperand(3);
 909     return true;
 910   }
 911
 912   if (N.getOpcode() != ISD::SELECT_CC ||
 913       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 914       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 915     return false;
 916
 917   if (TLI.getBooleanContents(N.getValueType()) ==
 918       TargetLowering::UndefinedBooleanContent)
 919     return false;
 920
 921   LHS = N.getOperand(0);
 922   RHS = N.getOperand(1);
 923   CC  = N.getOperand(4);
 924   return true;
 925 }
 926
 927 /// Return true if this is a SetCC-equivalent operation with only one use.
 928 /// If this is true, it allows the users to invert the operation for free when
 929 /// it is profitable to do so.
 930 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 931   SDValue N0, N1, N2;
 932   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 933     return true;
 934   return false;
 935 }
 936
 937 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
 938   if (!ScalarTy.isSimple())
 939     return false;
 940
 941   uint64_t MaskForTy = 0ULL;
 942   switch (ScalarTy.getSimpleVT().SimpleTy) {
 943   case MVT::i8:
 944     MaskForTy = 0xFFULL;
 945     break;
 946   case MVT::i16:
 947     MaskForTy = 0xFFFFULL;
 948     break;
 949   case MVT::i32:
 950     MaskForTy = 0xFFFFFFFFULL;
 951     break;
 952   default:
 953     return false;
 954     break;
 955   }
 956
 957   APInt Val;
 958   if (ISD::isConstantSplatVector(N, Val))
 959     return Val.getLimitedValue() == MaskForTy;
 960
 961   return false;
 962 }
 963
 964 // Determines if it is a constant integer or a splat/build vector of constant
 965 // integers (and undefs).
 966 // Do not permit build vector implicit truncation.
 967 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 968   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 969     return !(Const->isOpaque() && NoOpaques);
 970   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
 971     return false;
 972   unsigned BitWidth = N.getScalarValueSizeInBits();
 973   for (const SDValue &Op : N->op_values()) {
 974     if (Op.isUndef())
 975       continue;
 976     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 977     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 978         (Const->isOpaque() && NoOpaques))
 979       return false;
 980   }
 981   return true;
 982 }
 983
 984 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 985 // undef's.
 986 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
 987   if (V.getOpcode() != ISD::BUILD_VECTOR)
 988     return false;
 989   return isConstantOrConstantVector(V, NoOpaques) ||
 990          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 991 }
 992
 993 // Determine if this an indexed load with an opaque target constant index.
 994 static bool canSplitIdx(LoadSDNode *LD) {
 995   return MaySplitLoadIndex &&
 996          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
 997           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
 998 }
 999
1000 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1001                                                              const SDLoc &DL,
1002                                                              SDValue N0,
1003                                                              SDValue N1) {
1004   // Currently this only tries to ensure we don't undo the GEP splits done by
1005   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1006   // we check if the following transformation would be problematic:
1007   // (load/store (add, (add, x, offset1), offset2)) ->
1008   // (load/store (add, x, offset1+offset2)).
1009
1010   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1011     return false;
1012
1013   if (N0.hasOneUse())
1014     return false;
1015
1016   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1017   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1018   if (!C1 || !C2)
1019     return false;
1020
1021   const APInt &C1APIntVal = C1->getAPIntValue();
1022   const APInt &C2APIntVal = C2->getAPIntValue();
1023   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1024     return false;
1025
1026   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1027   if (CombinedValueIntVal.getBitWidth() > 64)
1028     return false;
1029   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1030
1031   for (SDNode *Node : N0->uses()) {
1032     auto LoadStore = dyn_cast<MemSDNode>(Node);
1033     if (LoadStore) {
1034       // Is x[offset2] already not a legal addressing mode? If so then
1035       // reassociating the constants breaks nothing (we test offset2 because
1036       // that's the one we hope to fold into the load or store).
1037       TargetLoweringBase::AddrMode AM;
1038       AM.HasBaseReg = true;
1039       AM.BaseOffs = C2APIntVal.getSExtValue();
1040       EVT VT = LoadStore->getMemoryVT();
1041       unsigned AS = LoadStore->getAddressSpace();
1042       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1043       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1044         continue;
1045
1046       // Would x[offset1+offset2] still be a legal addressing mode?
1047       AM.BaseOffs = CombinedValue;
1048       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1049         return true;
1050     }
1051   }
1052
1053   return false;
1054 }
1055
1056 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1057 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1058 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1059                                                SDValue N0, SDValue N1) {
1060   EVT VT = N0.getValueType();
1061
1062   if (N0.getOpcode() != Opc)
1063     return SDValue();
1064
1065   if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1066     if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1067       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1068       if (SDValue OpNode =
1069               DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1070         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1071       return SDValue();
1072     }
1073     if (N0.hasOneUse()) {
1074       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1075       //              iff (op x, c1) has one use
1076       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1077       if (!OpNode.getNode())
1078         return SDValue();
1079       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1080     }
1081   }
1082   return SDValue();
1083 }
1084
1085 // Try to reassociate commutative binops.
1086 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1087                                     SDValue N1, SDNodeFlags Flags) {
1088   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1089
1090   // Floating-point reassociation is not allowed without loose FP math.
1091   if (N0.getValueType().isFloatingPoint() ||
1092       N1.getValueType().isFloatingPoint())
1093     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1094       return SDValue();
1095
1096   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1097     return Combined;
1098   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1099     return Combined;
1100   return SDValue();
1101 }
1102
1103 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1104                                bool AddTo) {
1105   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1106   ++NodesCombined;
1107   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1108              To[0].getNode()->dump(&DAG);
1109              dbgs() << " and " << NumTo - 1 << " other values\n");
1110   for (unsigned i = 0, e = NumTo; i != e; ++i)
1111     assert((!To[i].getNode() ||
1112             N->getValueType(i) == To[i].getValueType()) &&
1113            "Cannot combine value to value of different type!");
1114
1115   WorklistRemover DeadNodes(*this);
1116   DAG.ReplaceAllUsesWith(N, To);
1117   if (AddTo) {
1118     // Push the new nodes and any users onto the worklist
1119     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1120       if (To[i].getNode()) {
1121         AddToWorklist(To[i].getNode());
1122         AddUsersToWorklist(To[i].getNode());
1123       }
1124     }
1125   }
1126
1127   // Finally, if the node is now dead, remove it from the graph.  The node
1128   // may not be dead if the replacement process recursively simplified to
1129   // something else needing this node.
1130   if (N->use_empty())
1131     deleteAndRecombine(N);
1132   return SDValue(N, 0);
1133 }
1134
1135 void DAGCombiner::
1136 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1137   // Replace the old value with the new one.
1138   ++NodesCombined;
1139   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1140              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1141              dbgs() << '\n');
1142
1143   // Replace all uses.  If any nodes become isomorphic to other nodes and
1144   // are deleted, make sure to remove them from our worklist.
1145   WorklistRemover DeadNodes(*this);
1146   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1147
1148   // Push the new node and any (possibly new) users onto the worklist.
1149   AddToWorklistWithUsers(TLO.New.getNode());
1150
1151   // Finally, if the node is now dead, remove it from the graph.  The node
1152   // may not be dead if the replacement process recursively simplified to
1153   // something else needing this node.
1154   if (TLO.Old.getNode()->use_empty())
1155     deleteAndRecombine(TLO.Old.getNode());
1156 }
1157
1158 /// Check the specified integer node value to see if it can be simplified or if
1159 /// things it uses can be simplified by bit propagation. If so, return true.
1160 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1161                                        const APInt &DemandedElts,
1162                                        bool AssumeSingleUse) {
1163   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1164   KnownBits Known;
1165   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1166                                 AssumeSingleUse))
1167     return false;
1168
1169   // Revisit the node.
1170   AddToWorklist(Op.getNode());
1171
1172   CommitTargetLoweringOpt(TLO);
1173   return true;
1174 }
1175
1176 /// Check the specified vector node value to see if it can be simplified or
1177 /// if things it uses can be simplified as it only uses some of the elements.
1178 /// If so, return true.
1179 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1180                                              const APInt &DemandedElts,
1181                                              bool AssumeSingleUse) {
1182   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1183   APInt KnownUndef, KnownZero;
1184   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1185                                       TLO, 0, AssumeSingleUse))
1186     return false;
1187
1188   // Revisit the node.
1189   AddToWorklist(Op.getNode());
1190
1191   CommitTargetLoweringOpt(TLO);
1192   return true;
1193 }
1194
1195 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1196   SDLoc DL(Load);
1197   EVT VT = Load->getValueType(0);
1198   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1199
1200   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1201              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1202   WorklistRemover DeadNodes(*this);
1203   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1204   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1205   deleteAndRecombine(Load);
1206   AddToWorklist(Trunc.getNode());
1207 }
1208
1209 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1210   Replace = false;
1211   SDLoc DL(Op);
1212   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1213     LoadSDNode *LD = cast<LoadSDNode>(Op);
1214     EVT MemVT = LD->getMemoryVT();
1215     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1216                                                       : LD->getExtensionType();
1217     Replace = true;
1218     return DAG.getExtLoad(ExtType, DL, PVT,
1219                           LD->getChain(), LD->getBasePtr(),
1220                           MemVT, LD->getMemOperand());
1221   }
1222
1223   unsigned Opc = Op.getOpcode();
1224   switch (Opc) {
1225   default: break;
1226   case ISD::AssertSext:
1227     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1228       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1229     break;
1230   case ISD::AssertZext:
1231     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1232       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1233     break;
1234   case ISD::Constant: {
1235     unsigned ExtOpc =
1236       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1237     return DAG.getNode(ExtOpc, DL, PVT, Op);
1238   }
1239   }
1240
1241   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1242     return SDValue();
1243   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1244 }
1245
1246 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1247   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1248     return SDValue();
1249   EVT OldVT = Op.getValueType();
1250   SDLoc DL(Op);
1251   bool Replace = false;
1252   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1253   if (!NewOp.getNode())
1254     return SDValue();
1255   AddToWorklist(NewOp.getNode());
1256
1257   if (Replace)
1258     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1259   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1260                      DAG.getValueType(OldVT));
1261 }
1262
1263 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1264   EVT OldVT = Op.getValueType();
1265   SDLoc DL(Op);
1266   bool Replace = false;
1267   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1268   if (!NewOp.getNode())
1269     return SDValue();
1270   AddToWorklist(NewOp.getNode());
1271
1272   if (Replace)
1273     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1274   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1275 }
1276
1277 /// Promote the specified integer binary operation if the target indicates it is
1278 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1279 /// i32 since i16 instructions are longer.
1280 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1281   if (!LegalOperations)
1282     return SDValue();
1283
1284   EVT VT = Op.getValueType();
1285   if (VT.isVector() || !VT.isInteger())
1286     return SDValue();
1287
1288   // If operation type is 'undesirable', e.g. i16 on x86, consider
1289   // promoting it.
1290   unsigned Opc = Op.getOpcode();
1291   if (TLI.isTypeDesirableForOp(Opc, VT))
1292     return SDValue();
1293
1294   EVT PVT = VT;
1295   // Consult target whether it is a good idea to promote this operation and
1296   // what's the right type to promote it to.
1297   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1298     assert(PVT != VT && "Don't know what type to promote to!");
1299
1300     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1301
1302     bool Replace0 = false;
1303     SDValue N0 = Op.getOperand(0);
1304     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1305
1306     bool Replace1 = false;
1307     SDValue N1 = Op.getOperand(1);
1308     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1309     SDLoc DL(Op);
1310
1311     SDValue RV =
1312         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1313
1314     // We are always replacing N0/N1's use in N and only need additional
1315     // replacements if there are additional uses.
1316     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1317     //       (SDValue) here because the node may reference multiple values
1318     //       (for example, the chain value of a load node).
1319     Replace0 &= !N0->hasOneUse();
1320     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1321
1322     // Combine Op here so it is preserved past replacements.
1323     CombineTo(Op.getNode(), RV);
1324
1325     // If operands have a use ordering, make sure we deal with
1326     // predecessor first.
1327     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1328       std::swap(N0, N1);
1329       std::swap(NN0, NN1);
1330     }
1331
1332     if (Replace0) {
1333       AddToWorklist(NN0.getNode());
1334       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1335     }
1336     if (Replace1) {
1337       AddToWorklist(NN1.getNode());
1338       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1339     }
1340     return Op;
1341   }
1342   return SDValue();
1343 }
1344
1345 /// Promote the specified integer shift operation if the target indicates it is
1346 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1347 /// i32 since i16 instructions are longer.
1348 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1349   if (!LegalOperations)
1350     return SDValue();
1351
1352   EVT VT = Op.getValueType();
1353   if (VT.isVector() || !VT.isInteger())
1354     return SDValue();
1355
1356   // If operation type is 'undesirable', e.g. i16 on x86, consider
1357   // promoting it.
1358   unsigned Opc = Op.getOpcode();
1359   if (TLI.isTypeDesirableForOp(Opc, VT))
1360     return SDValue();
1361
1362   EVT PVT = VT;
1363   // Consult target whether it is a good idea to promote this operation and
1364   // what's the right type to promote it to.
1365   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1366     assert(PVT != VT && "Don't know what type to promote to!");
1367
1368     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1369
1370     bool Replace = false;
1371     SDValue N0 = Op.getOperand(0);
1372     SDValue N1 = Op.getOperand(1);
1373     if (Opc == ISD::SRA)
1374       N0 = SExtPromoteOperand(N0, PVT);
1375     else if (Opc == ISD::SRL)
1376       N0 = ZExtPromoteOperand(N0, PVT);
1377     else
1378       N0 = PromoteOperand(N0, PVT, Replace);
1379
1380     if (!N0.getNode())
1381       return SDValue();
1382
1383     SDLoc DL(Op);
1384     SDValue RV =
1385         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1386
1387     if (Replace)
1388       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1389
1390     // Deal with Op being deleted.
1391     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1392       return RV;
1393   }
1394   return SDValue();
1395 }
1396
1397 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1398   if (!LegalOperations)
1399     return SDValue();
1400
1401   EVT VT = Op.getValueType();
1402   if (VT.isVector() || !VT.isInteger())
1403     return SDValue();
1404
1405   // If operation type is 'undesirable', e.g. i16 on x86, consider
1406   // promoting it.
1407   unsigned Opc = Op.getOpcode();
1408   if (TLI.isTypeDesirableForOp(Opc, VT))
1409     return SDValue();
1410
1411   EVT PVT = VT;
1412   // Consult target whether it is a good idea to promote this operation and
1413   // what's the right type to promote it to.
1414   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1415     assert(PVT != VT && "Don't know what type to promote to!");
1416     // fold (aext (aext x)) -> (aext x)
1417     // fold (aext (zext x)) -> (zext x)
1418     // fold (aext (sext x)) -> (sext x)
1419     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1420     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1421   }
1422   return SDValue();
1423 }
1424
1425 bool DAGCombiner::PromoteLoad(SDValue Op) {
1426   if (!LegalOperations)
1427     return false;
1428
1429   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1430     return false;
1431
1432   EVT VT = Op.getValueType();
1433   if (VT.isVector() || !VT.isInteger())
1434     return false;
1435
1436   // If operation type is 'undesirable', e.g. i16 on x86, consider
1437   // promoting it.
1438   unsigned Opc = Op.getOpcode();
1439   if (TLI.isTypeDesirableForOp(Opc, VT))
1440     return false;
1441
1442   EVT PVT = VT;
1443   // Consult target whether it is a good idea to promote this operation and
1444   // what's the right type to promote it to.
1445   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1446     assert(PVT != VT && "Don't know what type to promote to!");
1447
1448     SDLoc DL(Op);
1449     SDNode *N = Op.getNode();
1450     LoadSDNode *LD = cast<LoadSDNode>(N);
1451     EVT MemVT = LD->getMemoryVT();
1452     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1453                                                       : LD->getExtensionType();
1454     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1455                                    LD->getChain(), LD->getBasePtr(),
1456                                    MemVT, LD->getMemOperand());
1457     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1458
1459     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1460                Result.getNode()->dump(&DAG); dbgs() << '\n');
1461     WorklistRemover DeadNodes(*this);
1462     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1463     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1464     deleteAndRecombine(N);
1465     AddToWorklist(Result.getNode());
1466     return true;
1467   }
1468   return false;
1469 }
1470
1471 /// Recursively delete a node which has no uses and any operands for
1472 /// which it is the only use.
1473 ///
1474 /// Note that this both deletes the nodes and removes them from the worklist.
1475 /// It also adds any nodes who have had a user deleted to the worklist as they
1476 /// may now have only one use and subject to other combines.
1477 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1478   if (!N->use_empty())
1479     return false;
1480
1481   SmallSetVector<SDNode *, 16> Nodes;
1482   Nodes.insert(N);
1483   do {
1484     N = Nodes.pop_back_val();
1485     if (!N)
1486       continue;
1487
1488     if (N->use_empty()) {
1489       for (const SDValue &ChildN : N->op_values())
1490         Nodes.insert(ChildN.getNode());
1491
1492       removeFromWorklist(N);
1493       DAG.DeleteNode(N);
1494     } else {
1495       AddToWorklist(N);
1496     }
1497   } while (!Nodes.empty());
1498   return true;
1499 }
1500
1501 //===----------------------------------------------------------------------===//
1502 //  Main DAG Combiner implementation
1503 //===----------------------------------------------------------------------===//
1504
1505 void DAGCombiner::Run(CombineLevel AtLevel) {
1506   // set the instance variables, so that the various visit routines may use it.
1507   Level = AtLevel;
1508   LegalDAG = Level >= AfterLegalizeDAG;
1509   LegalOperations = Level >= AfterLegalizeVectorOps;
1510   LegalTypes = Level >= AfterLegalizeTypes;
1511
1512   WorklistInserter AddNodes(*this);
1513
1514   // Add all the dag nodes to the worklist.
1515   for (SDNode &Node : DAG.allnodes())
1516     AddToWorklist(&Node);
1517
1518   // Create a dummy node (which is not added to allnodes), that adds a reference
1519   // to the root node, preventing it from being deleted, and tracking any
1520   // changes of the root.
1521   HandleSDNode Dummy(DAG.getRoot());
1522
1523   // While we have a valid worklist entry node, try to combine it.
1524   while (SDNode *N = getNextWorklistEntry()) {
1525     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1526     // N is deleted from the DAG, since they too may now be dead or may have a
1527     // reduced number of uses, allowing other xforms.
1528     if (recursivelyDeleteUnusedNodes(N))
1529       continue;
1530
1531     WorklistRemover DeadNodes(*this);
1532
1533     // If this combine is running after legalizing the DAG, re-legalize any
1534     // nodes pulled off the worklist.
1535     if (LegalDAG) {
1536       SmallSetVector<SDNode *, 16> UpdatedNodes;
1537       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1538
1539       for (SDNode *LN : UpdatedNodes)
1540         AddToWorklistWithUsers(LN);
1541
1542       if (!NIsValid)
1543         continue;
1544     }
1545
1546     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1547
1548     // Add any operands of the new node which have not yet been combined to the
1549     // worklist as well. Because the worklist uniques things already, this
1550     // won't repeatedly process the same operand.
1551     CombinedNodes.insert(N);
1552     for (const SDValue &ChildN : N->op_values())
1553       if (!CombinedNodes.count(ChildN.getNode()))
1554         AddToWorklist(ChildN.getNode());
1555
1556     SDValue RV = combine(N);
1557
1558     if (!RV.getNode())
1559       continue;
1560
1561     ++NodesCombined;
1562
1563     // If we get back the same node we passed in, rather than a new node or
1564     // zero, we know that the node must have defined multiple values and
1565     // CombineTo was used.  Since CombineTo takes care of the worklist
1566     // mechanics for us, we have no work to do in this case.
1567     if (RV.getNode() == N)
1568       continue;
1569
1570     assert(N->getOpcode() != ISD::DELETED_NODE &&
1571            RV.getOpcode() != ISD::DELETED_NODE &&
1572            "Node was deleted but visit returned new node!");
1573
1574     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1575
1576     if (N->getNumValues() == RV.getNode()->getNumValues())
1577       DAG.ReplaceAllUsesWith(N, RV.getNode());
1578     else {
1579       assert(N->getValueType(0) == RV.getValueType() &&
1580              N->getNumValues() == 1 && "Type mismatch");
1581       DAG.ReplaceAllUsesWith(N, &RV);
1582     }
1583
1584     // Push the new node and any users onto the worklist.  Omit this if the
1585     // new node is the EntryToken (e.g. if a store managed to get optimized
1586     // out), because re-visiting the EntryToken and its users will not uncover
1587     // any additional opportunities, but there may be a large number of such
1588     // users, potentially causing compile time explosion.
1589     if (RV.getOpcode() != ISD::EntryToken) {
1590       AddToWorklist(RV.getNode());
1591       AddUsersToWorklist(RV.getNode());
1592     }
1593
1594     // Finally, if the node is now dead, remove it from the graph.  The node
1595     // may not be dead if the replacement process recursively simplified to
1596     // something else needing this node. This will also take care of adding any
1597     // operands which have lost a user to the worklist.
1598     recursivelyDeleteUnusedNodes(N);
1599   }
1600
1601   // If the root changed (e.g. it was a dead load, update the root).
1602   DAG.setRoot(Dummy.getValue());
1603   DAG.RemoveDeadNodes();
1604 }
1605
1606 SDValue DAGCombiner::visit(SDNode *N) {
1607   switch (N->getOpcode()) {
1608   default: break;
1609   case ISD::TokenFactor:        return visitTokenFactor(N);
1610   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1611   case ISD::ADD:                return visitADD(N);
1612   case ISD::SUB:                return visitSUB(N);
1613   case ISD::SADDSAT:
1614   case ISD::UADDSAT:            return visitADDSAT(N);
1615   case ISD::SSUBSAT:
1616   case ISD::USUBSAT:            return visitSUBSAT(N);
1617   case ISD::ADDC:               return visitADDC(N);
1618   case ISD::SADDO:
1619   case ISD::UADDO:              return visitADDO(N);
1620   case ISD::SUBC:               return visitSUBC(N);
1621   case ISD::SSUBO:
1622   case ISD::USUBO:              return visitSUBO(N);
1623   case ISD::ADDE:               return visitADDE(N);
1624   case ISD::ADDCARRY:           return visitADDCARRY(N);
1625   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1626   case ISD::SUBE:               return visitSUBE(N);
1627   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1628   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1629   case ISD::SMULFIX:
1630   case ISD::SMULFIXSAT:
1631   case ISD::UMULFIX:
1632   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1633   case ISD::MUL:                return visitMUL(N);
1634   case ISD::SDIV:               return visitSDIV(N);
1635   case ISD::UDIV:               return visitUDIV(N);
1636   case ISD::SREM:
1637   case ISD::UREM:               return visitREM(N);
1638   case ISD::MULHU:              return visitMULHU(N);
1639   case ISD::MULHS:              return visitMULHS(N);
1640   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1641   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1642   case ISD::SMULO:
1643   case ISD::UMULO:              return visitMULO(N);
1644   case ISD::SMIN:
1645   case ISD::SMAX:
1646   case ISD::UMIN:
1647   case ISD::UMAX:               return visitIMINMAX(N);
1648   case ISD::AND:                return visitAND(N);
1649   case ISD::OR:                 return visitOR(N);
1650   case ISD::XOR:                return visitXOR(N);
1651   case ISD::SHL:                return visitSHL(N);
1652   case ISD::SRA:                return visitSRA(N);
1653   case ISD::SRL:                return visitSRL(N);
1654   case ISD::ROTR:
1655   case ISD::ROTL:               return visitRotate(N);
1656   case ISD::FSHL:
1657   case ISD::FSHR:               return visitFunnelShift(N);
1658   case ISD::ABS:                return visitABS(N);
1659   case ISD::BSWAP:              return visitBSWAP(N);
1660   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1661   case ISD::CTLZ:               return visitCTLZ(N);
1662   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1663   case ISD::CTTZ:               return visitCTTZ(N);
1664   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1665   case ISD::CTPOP:              return visitCTPOP(N);
1666   case ISD::SELECT:             return visitSELECT(N);
1667   case ISD::VSELECT:            return visitVSELECT(N);
1668   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1669   case ISD::SETCC:              return visitSETCC(N);
1670   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1671   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1672   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1673   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1674   case ISD::AssertSext:
1675   case ISD::AssertZext:         return visitAssertExt(N);
1676   case ISD::AssertAlign:        return visitAssertAlign(N);
1677   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1678   case ISD::SIGN_EXTEND_VECTOR_INREG:
1679   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1680   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1681   case ISD::BITCAST:            return visitBITCAST(N);
1682   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1683   case ISD::FADD:               return visitFADD(N);
1684   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
1685   case ISD::FSUB:               return visitFSUB(N);
1686   case ISD::FMUL:               return visitFMUL(N);
1687   case ISD::FMA:                return visitFMA(N);
1688   case ISD::FDIV:               return visitFDIV(N);
1689   case ISD::FREM:               return visitFREM(N);
1690   case ISD::FSQRT:              return visitFSQRT(N);
1691   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1692   case ISD::FPOW:               return visitFPOW(N);
1693   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1694   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1695   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1696   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1697   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1698   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1699   case ISD::FNEG:               return visitFNEG(N);
1700   case ISD::FABS:               return visitFABS(N);
1701   case ISD::FFLOOR:             return visitFFLOOR(N);
1702   case ISD::FMINNUM:            return visitFMINNUM(N);
1703   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1704   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1705   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1706   case ISD::FCEIL:              return visitFCEIL(N);
1707   case ISD::FTRUNC:             return visitFTRUNC(N);
1708   case ISD::BRCOND:             return visitBRCOND(N);
1709   case ISD::BR_CC:              return visitBR_CC(N);
1710   case ISD::LOAD:               return visitLOAD(N);
1711   case ISD::STORE:              return visitSTORE(N);
1712   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1713   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1714   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1715   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1716   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1717   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1718   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1719   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1720   case ISD::MGATHER:            return visitMGATHER(N);
1721   case ISD::MLOAD:              return visitMLOAD(N);
1722   case ISD::MSCATTER:           return visitMSCATTER(N);
1723   case ISD::MSTORE:             return visitMSTORE(N);
1724   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1725   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1726   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1727   case ISD::FREEZE:             return visitFREEZE(N);
1728   case ISD::VECREDUCE_FADD:
1729   case ISD::VECREDUCE_FMUL:
1730   case ISD::VECREDUCE_ADD:
1731   case ISD::VECREDUCE_MUL:
1732   case ISD::VECREDUCE_AND:
1733   case ISD::VECREDUCE_OR:
1734   case ISD::VECREDUCE_XOR:
1735   case ISD::VECREDUCE_SMAX:
1736   case ISD::VECREDUCE_SMIN:
1737   case ISD::VECREDUCE_UMAX:
1738   case ISD::VECREDUCE_UMIN:
1739   case ISD::VECREDUCE_FMAX:
1740   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1741   }
1742   return SDValue();
1743 }
1744
1745 SDValue DAGCombiner::combine(SDNode *N) {
1746   SDValue RV;
1747   if (!DisableGenericCombines)
1748     RV = visit(N);
1749
1750   // If nothing happened, try a target-specific DAG combine.
1751   if (!RV.getNode()) {
1752     assert(N->getOpcode() != ISD::DELETED_NODE &&
1753            "Node was deleted but visit returned NULL!");
1754
1755     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1756         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1757
1758       // Expose the DAG combiner to the target combiner impls.
1759       TargetLowering::DAGCombinerInfo
1760         DagCombineInfo(DAG, Level, false, this);
1761
1762       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1763     }
1764   }
1765
1766   // If nothing happened still, try promoting the operation.
1767   if (!RV.getNode()) {
1768     switch (N->getOpcode()) {
1769     default: break;
1770     case ISD::ADD:
1771     case ISD::SUB:
1772     case ISD::MUL:
1773     case ISD::AND:
1774     case ISD::OR:
1775     case ISD::XOR:
1776       RV = PromoteIntBinOp(SDValue(N, 0));
1777       break;
1778     case ISD::SHL:
1779     case ISD::SRA:
1780     case ISD::SRL:
1781       RV = PromoteIntShiftOp(SDValue(N, 0));
1782       break;
1783     case ISD::SIGN_EXTEND:
1784     case ISD::ZERO_EXTEND:
1785     case ISD::ANY_EXTEND:
1786       RV = PromoteExtend(SDValue(N, 0));
1787       break;
1788     case ISD::LOAD:
1789       if (PromoteLoad(SDValue(N, 0)))
1790         RV = SDValue(N, 0);
1791       break;
1792     }
1793   }
1794
1795   // If N is a commutative binary node, try to eliminate it if the commuted
1796   // version is already present in the DAG.
1797   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1798       N->getNumValues() == 1) {
1799     SDValue N0 = N->getOperand(0);
1800     SDValue N1 = N->getOperand(1);
1801
1802     // Constant operands are canonicalized to RHS.
1803     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1804       SDValue Ops[] = {N1, N0};
1805       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1806                                             N->getFlags());
1807       if (CSENode)
1808         return SDValue(CSENode, 0);
1809     }
1810   }
1811
1812   return RV;
1813 }
1814
1815 /// Given a node, return its input chain if it has one, otherwise return a null
1816 /// sd operand.
1817 static SDValue getInputChainForNode(SDNode *N) {
1818   if (unsigned NumOps = N->getNumOperands()) {
1819     if (N->getOperand(0).getValueType() == MVT::Other)
1820       return N->getOperand(0);
1821     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1822       return N->getOperand(NumOps-1);
1823     for (unsigned i = 1; i < NumOps-1; ++i)
1824       if (N->getOperand(i).getValueType() == MVT::Other)
1825         return N->getOperand(i);
1826   }
1827   return SDValue();
1828 }
1829
1830 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1831   // If N has two operands, where one has an input chain equal to the other,
1832   // the 'other' chain is redundant.
1833   if (N->getNumOperands() == 2) {
1834     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1835       return N->getOperand(0);
1836     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1837       return N->getOperand(1);
1838   }
1839
1840   // Don't simplify token factors if optnone.
1841   if (OptLevel == CodeGenOpt::None)
1842     return SDValue();
1843
1844   // Don't simplify the token factor if the node itself has too many operands.
1845   if (N->getNumOperands() > TokenFactorInlineLimit)
1846     return SDValue();
1847
1848   // If the sole user is a token factor, we should make sure we have a
1849   // chance to merge them together. This prevents TF chains from inhibiting
1850   // optimizations.
1851   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1852     AddToWorklist(*(N->use_begin()));
1853
1854   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1855   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1856   SmallPtrSet<SDNode*, 16> SeenOps;
1857   bool Changed = false;             // If we should replace this token factor.
1858
1859   // Start out with this token factor.
1860   TFs.push_back(N);
1861
1862   // Iterate through token factors.  The TFs grows when new token factors are
1863   // encountered.
1864   for (unsigned i = 0; i < TFs.size(); ++i) {
1865     // Limit number of nodes to inline, to avoid quadratic compile times.
1866     // We have to add the outstanding Token Factors to Ops, otherwise we might
1867     // drop Ops from the resulting Token Factors.
1868     if (Ops.size() > TokenFactorInlineLimit) {
1869       for (unsigned j = i; j < TFs.size(); j++)
1870         Ops.emplace_back(TFs[j], 0);
1871       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1872       // combiner worklist later.
1873       TFs.resize(i);
1874       break;
1875     }
1876
1877     SDNode *TF = TFs[i];
1878     // Check each of the operands.
1879     for (const SDValue &Op : TF->op_values()) {
1880       switch (Op.getOpcode()) {
1881       case ISD::EntryToken:
1882         // Entry tokens don't need to be added to the list. They are
1883         // redundant.
1884         Changed = true;
1885         break;
1886
1887       case ISD::TokenFactor:
1888         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1889           // Queue up for processing.
1890           TFs.push_back(Op.getNode());
1891           Changed = true;
1892           break;
1893         }
1894         LLVM_FALLTHROUGH;
1895
1896       default:
1897         // Only add if it isn't already in the list.
1898         if (SeenOps.insert(Op.getNode()).second)
1899           Ops.push_back(Op);
1900         else
1901           Changed = true;
1902         break;
1903       }
1904     }
1905   }
1906
1907   // Re-visit inlined Token Factors, to clean them up in case they have been
1908   // removed. Skip the first Token Factor, as this is the current node.
1909   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1910     AddToWorklist(TFs[i]);
1911
1912   // Remove Nodes that are chained to another node in the list. Do so
1913   // by walking up chains breath-first stopping when we've seen
1914   // another operand. In general we must climb to the EntryNode, but we can exit
1915   // early if we find all remaining work is associated with just one operand as
1916   // no further pruning is possible.
1917
1918   // List of nodes to search through and original Ops from which they originate.
1919   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1920   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1921   SmallPtrSet<SDNode *, 16> SeenChains;
1922   bool DidPruneOps = false;
1923
1924   unsigned NumLeftToConsider = 0;
1925   for (const SDValue &Op : Ops) {
1926     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1927     OpWorkCount.push_back(1);
1928   }
1929
1930   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1931     // If this is an Op, we can remove the op from the list. Remark any
1932     // search associated with it as from the current OpNumber.
1933     if (SeenOps.contains(Op)) {
1934       Changed = true;
1935       DidPruneOps = true;
1936       unsigned OrigOpNumber = 0;
1937       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1938         OrigOpNumber++;
1939       assert((OrigOpNumber != Ops.size()) &&
1940              "expected to find TokenFactor Operand");
1941       // Re-mark worklist from OrigOpNumber to OpNumber
1942       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1943         if (Worklist[i].second == OrigOpNumber) {
1944           Worklist[i].second = OpNumber;
1945         }
1946       }
1947       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1948       OpWorkCount[OrigOpNumber] = 0;
1949       NumLeftToConsider--;
1950     }
1951     // Add if it's a new chain
1952     if (SeenChains.insert(Op).second) {
1953       OpWorkCount[OpNumber]++;
1954       Worklist.push_back(std::make_pair(Op, OpNumber));
1955     }
1956   };
1957
1958   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1959     // We need at least be consider at least 2 Ops to prune.
1960     if (NumLeftToConsider <= 1)
1961       break;
1962     auto CurNode = Worklist[i].first;
1963     auto CurOpNumber = Worklist[i].second;
1964     assert((OpWorkCount[CurOpNumber] > 0) &&
1965            "Node should not appear in worklist");
1966     switch (CurNode->getOpcode()) {
1967     case ISD::EntryToken:
1968       // Hitting EntryToken is the only way for the search to terminate without
1969       // hitting
1970       // another operand's search. Prevent us from marking this operand
1971       // considered.
1972       NumLeftToConsider++;
1973       break;
1974     case ISD::TokenFactor:
1975       for (const SDValue &Op : CurNode->op_values())
1976         AddToWorklist(i, Op.getNode(), CurOpNumber);
1977       break;
1978     case ISD::LIFETIME_START:
1979     case ISD::LIFETIME_END:
1980     case ISD::CopyFromReg:
1981     case ISD::CopyToReg:
1982       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1983       break;
1984     default:
1985       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1986         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1987       break;
1988     }
1989     OpWorkCount[CurOpNumber]--;
1990     if (OpWorkCount[CurOpNumber] == 0)
1991       NumLeftToConsider--;
1992   }
1993
1994   // If we've changed things around then replace token factor.
1995   if (Changed) {
1996     SDValue Result;
1997     if (Ops.empty()) {
1998       // The entry token is the only possible outcome.
1999       Result = DAG.getEntryNode();
2000     } else {
2001       if (DidPruneOps) {
2002         SmallVector<SDValue, 8> PrunedOps;
2003         //
2004         for (const SDValue &Op : Ops) {
2005           if (SeenChains.count(Op.getNode()) == 0)
2006             PrunedOps.push_back(Op);
2007         }
2008         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2009       } else {
2010         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2011       }
2012     }
2013     return Result;
2014   }
2015   return SDValue();
2016 }
2017
2018 /// MERGE_VALUES can always be eliminated.
2019 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2020   WorklistRemover DeadNodes(*this);
2021   // Replacing results may cause a different MERGE_VALUES to suddenly
2022   // be CSE'd with N, and carry its uses with it. Iterate until no
2023   // uses remain, to ensure that the node can be safely deleted.
2024   // First add the users of this node to the work list so that they
2025   // can be tried again once they have new operands.
2026   AddUsersToWorklist(N);
2027   do {
2028     // Do as a single replacement to avoid rewalking use lists.
2029     SmallVector<SDValue, 8> Ops;
2030     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2031       Ops.push_back(N->getOperand(i));
2032     DAG.ReplaceAllUsesWith(N, Ops.data());
2033   } while (!N->use_empty());
2034   deleteAndRecombine(N);
2035   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2036 }
2037
2038 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2039 /// ConstantSDNode pointer else nullptr.
2040 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2041   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2042   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2043 }
2044
2045 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2046 /// and that N may be folded in the load / store addressing mode.
2047 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2048                                     const TargetLowering &TLI) {
2049   EVT VT;
2050   unsigned AS;
2051
2052   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2053     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2054       return false;
2055     VT = LD->getMemoryVT();
2056     AS = LD->getAddressSpace();
2057   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2058     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2059       return false;
2060     VT = ST->getMemoryVT();
2061     AS = ST->getAddressSpace();
2062   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2063     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2064       return false;
2065     VT = LD->getMemoryVT();
2066     AS = LD->getAddressSpace();
2067   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2068     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2069       return false;
2070     VT = ST->getMemoryVT();
2071     AS = ST->getAddressSpace();
2072   } else
2073     return false;
2074
2075   TargetLowering::AddrMode AM;
2076   if (N->getOpcode() == ISD::ADD) {
2077     AM.HasBaseReg = true;
2078     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2079     if (Offset)
2080       // [reg +/- imm]
2081       AM.BaseOffs = Offset->getSExtValue();
2082     else
2083       // [reg +/- reg]
2084       AM.Scale = 1;
2085   } else if (N->getOpcode() == ISD::SUB) {
2086     AM.HasBaseReg = true;
2087     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2088     if (Offset)
2089       // [reg +/- imm]
2090       AM.BaseOffs = -Offset->getSExtValue();
2091     else
2092       // [reg +/- reg]
2093       AM.Scale = 1;
2094   } else
2095     return false;
2096
2097   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2098                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2099 }
2100
2101 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2102   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2103          "Unexpected binary operator");
2104
2105   // Don't do this unless the old select is going away. We want to eliminate the
2106   // binary operator, not replace a binop with a select.
2107   // TODO: Handle ISD::SELECT_CC.
2108   unsigned SelOpNo = 0;
2109   SDValue Sel = BO->getOperand(0);
2110   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2111     SelOpNo = 1;
2112     Sel = BO->getOperand(1);
2113   }
2114
2115   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2116     return SDValue();
2117
2118   SDValue CT = Sel.getOperand(1);
2119   if (!isConstantOrConstantVector(CT, true) &&
2120       !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2121     return SDValue();
2122
2123   SDValue CF = Sel.getOperand(2);
2124   if (!isConstantOrConstantVector(CF, true) &&
2125       !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2126     return SDValue();
2127
2128   // Bail out if any constants are opaque because we can't constant fold those.
2129   // The exception is "and" and "or" with either 0 or -1 in which case we can
2130   // propagate non constant operands into select. I.e.:
2131   // and (select Cond, 0, -1), X --> select Cond, 0, X
2132   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2133   auto BinOpcode = BO->getOpcode();
2134   bool CanFoldNonConst =
2135       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2136       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2137       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2138
2139   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2140   if (!CanFoldNonConst &&
2141       !isConstantOrConstantVector(CBO, true) &&
2142       !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2143     return SDValue();
2144
2145   EVT VT = BO->getValueType(0);
2146
2147   // We have a select-of-constants followed by a binary operator with a
2148   // constant. Eliminate the binop by pulling the constant math into the select.
2149   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2150   SDLoc DL(Sel);
2151   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2152                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2153   if (!CanFoldNonConst && !NewCT.isUndef() &&
2154       !isConstantOrConstantVector(NewCT, true) &&
2155       !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2156     return SDValue();
2157
2158   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2159                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2160   if (!CanFoldNonConst && !NewCF.isUndef() &&
2161       !isConstantOrConstantVector(NewCF, true) &&
2162       !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2163     return SDValue();
2164
2165   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2166   SelectOp->setFlags(BO->getFlags());
2167   return SelectOp;
2168 }
2169
2170 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2171   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2172          "Expecting add or sub");
2173
2174   // Match a constant operand and a zext operand for the math instruction:
2175   // add Z, C
2176   // sub C, Z
2177   bool IsAdd = N->getOpcode() == ISD::ADD;
2178   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2179   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2180   auto *CN = dyn_cast<ConstantSDNode>(C);
2181   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2182     return SDValue();
2183
2184   // Match the zext operand as a setcc of a boolean.
2185   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2186       Z.getOperand(0).getValueType() != MVT::i1)
2187     return SDValue();
2188
2189   // Match the compare as: setcc (X & 1), 0, eq.
2190   SDValue SetCC = Z.getOperand(0);
2191   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2192   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2193       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2194       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2195     return SDValue();
2196
2197   // We are adding/subtracting a constant and an inverted low bit. Turn that
2198   // into a subtract/add of the low bit with incremented/decremented constant:
2199   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2200   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2201   EVT VT = C.getValueType();
2202   SDLoc DL(N);
2203   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2204   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2205                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2206   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2207 }
2208
2209 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2210 /// a shift and add with a different constant.
2211 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2212   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2213          "Expecting add or sub");
2214
2215   // We need a constant operand for the add/sub, and the other operand is a
2216   // logical shift right: add (srl), C or sub C, (srl).
2217   bool IsAdd = N->getOpcode() == ISD::ADD;
2218   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2219   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2220   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2221       ShiftOp.getOpcode() != ISD::SRL)
2222     return SDValue();
2223
2224   // The shift must be of a 'not' value.
2225   SDValue Not = ShiftOp.getOperand(0);
2226   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2227     return SDValue();
2228
2229   // The shift must be moving the sign bit to the least-significant-bit.
2230   EVT VT = ShiftOp.getValueType();
2231   SDValue ShAmt = ShiftOp.getOperand(1);
2232   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2233   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2234     return SDValue();
2235
2236   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2237   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2238   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2239   SDLoc DL(N);
2240   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2241   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2242   if (SDValue NewC =
2243           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2244                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2245     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2246   return SDValue();
2247 }
2248
2249 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2250 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2251 /// are no common bits set in the operands).
2252 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2253   SDValue N0 = N->getOperand(0);
2254   SDValue N1 = N->getOperand(1);
2255   EVT VT = N0.getValueType();
2256   SDLoc DL(N);
2257
2258   // fold vector ops
2259   if (VT.isVector()) {
2260     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2261       return FoldedVOp;
2262
2263     // fold (add x, 0) -> x, vector edition
2264     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2265       return N0;
2266     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2267       return N1;
2268   }
2269
2270   // fold (add x, undef) -> undef
2271   if (N0.isUndef())
2272     return N0;
2273
2274   if (N1.isUndef())
2275     return N1;
2276
2277   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2278     // canonicalize constant to RHS
2279     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2280       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2281     // fold (add c1, c2) -> c1+c2
2282     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2283   }
2284
2285   // fold (add x, 0) -> x
2286   if (isNullConstant(N1))
2287     return N0;
2288
2289   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2290     // fold ((A-c1)+c2) -> (A+(c2-c1))
2291     if (N0.getOpcode() == ISD::SUB &&
2292         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2293       SDValue Sub =
2294           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2295       assert(Sub && "Constant folding failed");
2296       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2297     }
2298
2299     // fold ((c1-A)+c2) -> (c1+c2)-A
2300     if (N0.getOpcode() == ISD::SUB &&
2301         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2302       SDValue Add =
2303           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2304       assert(Add && "Constant folding failed");
2305       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2306     }
2307
2308     // add (sext i1 X), 1 -> zext (not i1 X)
2309     // We don't transform this pattern:
2310     //   add (zext i1 X), -1 -> sext (not i1 X)
2311     // because most (?) targets generate better code for the zext form.
2312     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2313         isOneOrOneSplat(N1)) {
2314       SDValue X = N0.getOperand(0);
2315       if ((!LegalOperations ||
2316            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2317             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2318           X.getScalarValueSizeInBits() == 1) {
2319         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2320         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2321       }
2322     }
2323
2324     // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2325     // equivalent to (add x, c0).
2326     if (N0.getOpcode() == ISD::OR &&
2327         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2328         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2329       if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2330                                                     {N1, N0.getOperand(1)}))
2331         return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2332     }
2333   }
2334
2335   if (SDValue NewSel = foldBinOpIntoSelect(N))
2336     return NewSel;
2337
2338   // reassociate add
2339   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2340     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2341       return RADD;
2342
2343     // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2344     // equivalent to (add x, c).
2345     auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2346       if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2347           isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2348           DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2349         return DAG.getNode(ISD::ADD, DL, VT,
2350                            DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2351                            N0.getOperand(1));
2352       }
2353       return SDValue();
2354     };
2355     if (SDValue Add = ReassociateAddOr(N0, N1))
2356       return Add;
2357     if (SDValue Add = ReassociateAddOr(N1, N0))
2358       return Add;
2359   }
2360   // fold ((0-A) + B) -> B-A
2361   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2362     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2363
2364   // fold (A + (0-B)) -> A-B
2365   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2366     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2367
2368   // fold (A+(B-A)) -> B
2369   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2370     return N1.getOperand(0);
2371
2372   // fold ((B-A)+A) -> B
2373   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2374     return N0.getOperand(0);
2375
2376   // fold ((A-B)+(C-A)) -> (C-B)
2377   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2378       N0.getOperand(0) == N1.getOperand(1))
2379     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380                        N0.getOperand(1));
2381
2382   // fold ((A-B)+(B-C)) -> (A-C)
2383   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2384       N0.getOperand(1) == N1.getOperand(0))
2385     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2386                        N1.getOperand(1));
2387
2388   // fold (A+(B-(A+C))) to (B-C)
2389   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2390       N0 == N1.getOperand(1).getOperand(0))
2391     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2392                        N1.getOperand(1).getOperand(1));
2393
2394   // fold (A+(B-(C+A))) to (B-C)
2395   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2396       N0 == N1.getOperand(1).getOperand(1))
2397     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2398                        N1.getOperand(1).getOperand(0));
2399
2400   // fold (A+((B-A)+or-C)) to (B+or-C)
2401   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2402       N1.getOperand(0).getOpcode() == ISD::SUB &&
2403       N0 == N1.getOperand(0).getOperand(1))
2404     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2405                        N1.getOperand(1));
2406
2407   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2408   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2409     SDValue N00 = N0.getOperand(0);
2410     SDValue N01 = N0.getOperand(1);
2411     SDValue N10 = N1.getOperand(0);
2412     SDValue N11 = N1.getOperand(1);
2413
2414     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2415       return DAG.getNode(ISD::SUB, DL, VT,
2416                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2417                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2418   }
2419
2420   // fold (add (umax X, C), -C) --> (usubsat X, C)
2421   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2422     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2423       return (!Max && !Op) ||
2424              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2425     };
2426     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2427                                   /*AllowUndefs*/ true))
2428       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2429                          N0.getOperand(1));
2430   }
2431
2432   if (SimplifyDemandedBits(SDValue(N, 0)))
2433     return SDValue(N, 0);
2434
2435   if (isOneOrOneSplat(N1)) {
2436     // fold (add (xor a, -1), 1) -> (sub 0, a)
2437     if (isBitwiseNot(N0))
2438       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2439                          N0.getOperand(0));
2440
2441     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2442     if (N0.getOpcode() == ISD::ADD ||
2443         N0.getOpcode() == ISD::UADDO ||
2444         N0.getOpcode() == ISD::SADDO) {
2445       SDValue A, Xor;
2446
2447       if (isBitwiseNot(N0.getOperand(0))) {
2448         A = N0.getOperand(1);
2449         Xor = N0.getOperand(0);
2450       } else if (isBitwiseNot(N0.getOperand(1))) {
2451         A = N0.getOperand(0);
2452         Xor = N0.getOperand(1);
2453       }
2454
2455       if (Xor)
2456         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2457     }
2458
2459     // Look for:
2460     //   add (add x, y), 1
2461     // And if the target does not like this form then turn into:
2462     //   sub y, (xor x, -1)
2463     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2464         N0.getOpcode() == ISD::ADD) {
2465       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2466                                 DAG.getAllOnesConstant(DL, VT));
2467       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2468     }
2469   }
2470
2471   // (x - y) + -1  ->  add (xor y, -1), x
2472   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2473       isAllOnesOrAllOnesSplat(N1)) {
2474     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2475     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2476   }
2477
2478   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2479     return Combined;
2480
2481   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2482     return Combined;
2483
2484   return SDValue();
2485 }
2486
2487 SDValue DAGCombiner::visitADD(SDNode *N) {
2488   SDValue N0 = N->getOperand(0);
2489   SDValue N1 = N->getOperand(1);
2490   EVT VT = N0.getValueType();
2491   SDLoc DL(N);
2492
2493   if (SDValue Combined = visitADDLike(N))
2494     return Combined;
2495
2496   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2497     return V;
2498
2499   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2500     return V;
2501
2502   // fold (a+b) -> (a|b) iff a and b share no bits.
2503   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2504       DAG.haveNoCommonBitsSet(N0, N1))
2505     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2506
2507   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2508   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2509     const APInt &C0 = N0->getConstantOperandAPInt(0);
2510     const APInt &C1 = N1->getConstantOperandAPInt(0);
2511     return DAG.getVScale(DL, VT, C0 + C1);
2512   }
2513
2514   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2515   if ((N0.getOpcode() == ISD::ADD) &&
2516       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2517       (N1.getOpcode() == ISD::VSCALE)) {
2518     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2519     const APInt &VS1 = N1->getConstantOperandAPInt(0);
2520     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2521     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2522   }
2523
2524   // Fold (add step_vector(c1), step_vector(c2)  to step_vector(c1+c2))
2525   if (N0.getOpcode() == ISD::STEP_VECTOR &&
2526       N1.getOpcode() == ISD::STEP_VECTOR) {
2527     const APInt &C0 = N0->getConstantOperandAPInt(0);
2528     const APInt &C1 = N1->getConstantOperandAPInt(0);
2529     APInt NewStep = C0 + C1;
2530     return DAG.getStepVector(DL, VT, NewStep);
2531   }
2532
2533   // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2534   if ((N0.getOpcode() == ISD::ADD) &&
2535       (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2536       (N1.getOpcode() == ISD::STEP_VECTOR)) {
2537     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2538     const APInt &SV1 = N1->getConstantOperandAPInt(0);
2539     APInt NewStep = SV0 + SV1;
2540     SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2541     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2542   }
2543
2544   return SDValue();
2545 }
2546
2547 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2548   unsigned Opcode = N->getOpcode();
2549   SDValue N0 = N->getOperand(0);
2550   SDValue N1 = N->getOperand(1);
2551   EVT VT = N0.getValueType();
2552   SDLoc DL(N);
2553
2554   // fold vector ops
2555   if (VT.isVector()) {
2556     // TODO SimplifyVBinOp
2557
2558     // fold (add_sat x, 0) -> x, vector edition
2559     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2560       return N0;
2561     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2562       return N1;
2563   }
2564
2565   // fold (add_sat x, undef) -> -1
2566   if (N0.isUndef() || N1.isUndef())
2567     return DAG.getAllOnesConstant(DL, VT);
2568
2569   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2570     // canonicalize constant to RHS
2571     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2572       return DAG.getNode(Opcode, DL, VT, N1, N0);
2573     // fold (add_sat c1, c2) -> c3
2574     return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2575   }
2576
2577   // fold (add_sat x, 0) -> x
2578   if (isNullConstant(N1))
2579     return N0;
2580
2581   // If it cannot overflow, transform into an add.
2582   if (Opcode == ISD::UADDSAT)
2583     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2584       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2585
2586   return SDValue();
2587 }
2588
2589 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2590   bool Masked = false;
2591
2592   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2593   while (true) {
2594     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2595       V = V.getOperand(0);
2596       continue;
2597     }
2598
2599     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2600       Masked = true;
2601       V = V.getOperand(0);
2602       continue;
2603     }
2604
2605     break;
2606   }
2607
2608   // If this is not a carry, return.
2609   if (V.getResNo() != 1)
2610     return SDValue();
2611
2612   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2613       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2614     return SDValue();
2615
2616   EVT VT = V.getNode()->getValueType(0);
2617   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2618     return SDValue();
2619
2620   // If the result is masked, then no matter what kind of bool it is we can
2621   // return. If it isn't, then we need to make sure the bool type is either 0 or
2622   // 1 and not other values.
2623   if (Masked ||
2624       TLI.getBooleanContents(V.getValueType()) ==
2625           TargetLoweringBase::ZeroOrOneBooleanContent)
2626     return V;
2627
2628   return SDValue();
2629 }
2630
2631 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2632 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2633 /// the opcode and bypass the mask operation.
2634 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2635                                  SelectionDAG &DAG, const SDLoc &DL) {
2636   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2637     return SDValue();
2638
2639   EVT VT = N0.getValueType();
2640   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2641     return SDValue();
2642
2643   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2644   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2645   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2646 }
2647
2648 /// Helper for doing combines based on N0 and N1 being added to each other.
2649 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2650                                           SDNode *LocReference) {
2651   EVT VT = N0.getValueType();
2652   SDLoc DL(LocReference);
2653
2654   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2655   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2656       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2657     return DAG.getNode(ISD::SUB, DL, VT, N0,
2658                        DAG.getNode(ISD::SHL, DL, VT,
2659                                    N1.getOperand(0).getOperand(1),
2660                                    N1.getOperand(1)));
2661
2662   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2663     return V;
2664
2665   // Look for:
2666   //   add (add x, 1), y
2667   // And if the target does not like this form then turn into:
2668   //   sub y, (xor x, -1)
2669   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2670       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2671     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2672                               DAG.getAllOnesConstant(DL, VT));
2673     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2674   }
2675
2676   // Hoist one-use subtraction by non-opaque constant:
2677   //   (x - C) + y  ->  (x + y) - C
2678   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2679   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2680       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2681     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2682     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2683   }
2684   // Hoist one-use subtraction from non-opaque constant:
2685   //   (C - x) + y  ->  (y - x) + C
2686   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2687       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2688     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2689     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2690   }
2691
2692   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2693   // rather than 'add 0/-1' (the zext should get folded).
2694   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2695   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2696       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2697       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2698     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2699     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2700   }
2701
2702   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2703   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2704     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2705     if (TN->getVT() == MVT::i1) {
2706       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2707                                  DAG.getConstant(1, DL, VT));
2708       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2709     }
2710   }
2711
2712   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2713   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2714       N1.getResNo() == 0)
2715     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2716                        N0, N1.getOperand(0), N1.getOperand(2));
2717
2718   // (add X, Carry) -> (addcarry X, 0, Carry)
2719   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2720     if (SDValue Carry = getAsCarry(TLI, N1))
2721       return DAG.getNode(ISD::ADDCARRY, DL,
2722                          DAG.getVTList(VT, Carry.getValueType()), N0,
2723                          DAG.getConstant(0, DL, VT), Carry);
2724
2725   return SDValue();
2726 }
2727
2728 SDValue DAGCombiner::visitADDC(SDNode *N) {
2729   SDValue N0 = N->getOperand(0);
2730   SDValue N1 = N->getOperand(1);
2731   EVT VT = N0.getValueType();
2732   SDLoc DL(N);
2733
2734   // If the flag result is dead, turn this into an ADD.
2735   if (!N->hasAnyUseOfValue(1))
2736     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2737                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2738
2739   // canonicalize constant to RHS.
2740   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2741   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2742   if (N0C && !N1C)
2743     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2744
2745   // fold (addc x, 0) -> x + no carry out
2746   if (isNullConstant(N1))
2747     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2748                                         DL, MVT::Glue));
2749
2750   // If it cannot overflow, transform into an add.
2751   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2752     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2753                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2754
2755   return SDValue();
2756 }
2757
2758 /**
2759  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2760  * then the flip also occurs if computing the inverse is the same cost.
2761  * This function returns an empty SDValue in case it cannot flip the boolean
2762  * without increasing the cost of the computation. If you want to flip a boolean
2763  * no matter what, use DAG.getLogicalNOT.
2764  */
2765 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2766                                   const TargetLowering &TLI,
2767                                   bool Force) {
2768   if (Force && isa<ConstantSDNode>(V))
2769     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2770
2771   if (V.getOpcode() != ISD::XOR)
2772     return SDValue();
2773
2774   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2775   if (!Const)
2776     return SDValue();
2777
2778   EVT VT = V.getValueType();
2779
2780   bool IsFlip = false;
2781   switch(TLI.getBooleanContents(VT)) {
2782     case TargetLowering::ZeroOrOneBooleanContent:
2783       IsFlip = Const->isOne();
2784       break;
2785     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2786       IsFlip = Const->isAllOnesValue();
2787       break;
2788     case TargetLowering::UndefinedBooleanContent:
2789       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2790       break;
2791   }
2792
2793   if (IsFlip)
2794     return V.getOperand(0);
2795   if (Force)
2796     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2797   return SDValue();
2798 }
2799
2800 SDValue DAGCombiner::visitADDO(SDNode *N) {
2801   SDValue N0 = N->getOperand(0);
2802   SDValue N1 = N->getOperand(1);
2803   EVT VT = N0.getValueType();
2804   bool IsSigned = (ISD::SADDO == N->getOpcode());
2805
2806   EVT CarryVT = N->getValueType(1);
2807   SDLoc DL(N);
2808
2809   // If the flag result is dead, turn this into an ADD.
2810   if (!N->hasAnyUseOfValue(1))
2811     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2812                      DAG.getUNDEF(CarryVT));
2813
2814   // canonicalize constant to RHS.
2815   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2816       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2817     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2818
2819   // fold (addo x, 0) -> x + no carry out
2820   if (isNullOrNullSplat(N1))
2821     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2822
2823   if (!IsSigned) {
2824     // If it cannot overflow, transform into an add.
2825     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2826       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2827                        DAG.getConstant(0, DL, CarryVT));
2828
2829     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2830     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2831       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2832                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2833       return CombineTo(
2834           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2835     }
2836
2837     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2838       return Combined;
2839
2840     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2841       return Combined;
2842   }
2843
2844   return SDValue();
2845 }
2846
2847 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2848   EVT VT = N0.getValueType();
2849   if (VT.isVector())
2850     return SDValue();
2851
2852   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2853   // If Y + 1 cannot overflow.
2854   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2855     SDValue Y = N1.getOperand(0);
2856     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2857     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2858       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2859                          N1.getOperand(2));
2860   }
2861
2862   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2863   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2864     if (SDValue Carry = getAsCarry(TLI, N1))
2865       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2866                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2867
2868   return SDValue();
2869 }
2870
2871 SDValue DAGCombiner::visitADDE(SDNode *N) {
2872   SDValue N0 = N->getOperand(0);
2873   SDValue N1 = N->getOperand(1);
2874   SDValue CarryIn = N->getOperand(2);
2875
2876   // canonicalize constant to RHS
2877   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2878   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2879   if (N0C && !N1C)
2880     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2881                        N1, N0, CarryIn);
2882
2883   // fold (adde x, y, false) -> (addc x, y)
2884   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2885     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2886
2887   return SDValue();
2888 }
2889
2890 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2891   SDValue N0 = N->getOperand(0);
2892   SDValue N1 = N->getOperand(1);
2893   SDValue CarryIn = N->getOperand(2);
2894   SDLoc DL(N);
2895
2896   // canonicalize constant to RHS
2897   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2898   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2899   if (N0C && !N1C)
2900     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2901
2902   // fold (addcarry x, y, false) -> (uaddo x, y)
2903   if (isNullConstant(CarryIn)) {
2904     if (!LegalOperations ||
2905         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2906       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2907   }
2908
2909   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2910   if (isNullConstant(N0) && isNullConstant(N1)) {
2911     EVT VT = N0.getValueType();
2912     EVT CarryVT = CarryIn.getValueType();
2913     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2914     AddToWorklist(CarryExt.getNode());
2915     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2916                                     DAG.getConstant(1, DL, VT)),
2917                      DAG.getConstant(0, DL, CarryVT));
2918   }
2919
2920   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2921     return Combined;
2922
2923   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2924     return Combined;
2925
2926   return SDValue();
2927 }
2928
2929 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2930   SDValue N0 = N->getOperand(0);
2931   SDValue N1 = N->getOperand(1);
2932   SDValue CarryIn = N->getOperand(2);
2933   SDLoc DL(N);
2934
2935   // canonicalize constant to RHS
2936   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2937   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2938   if (N0C && !N1C)
2939     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2940
2941   // fold (saddo_carry x, y, false) -> (saddo x, y)
2942   if (isNullConstant(CarryIn)) {
2943     if (!LegalOperations ||
2944         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2945       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2946   }
2947
2948   return SDValue();
2949 }
2950
2951 /**
2952  * If we are facing some sort of diamond carry propapagtion pattern try to
2953  * break it up to generate something like:
2954  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2955  *
2956  * The end result is usually an increase in operation required, but because the
2957  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2958  *
2959  * Patterns typically look something like
2960  *            (uaddo A, B)
2961  *             /       \
2962  *          Carry      Sum
2963  *            |          \
2964  *            | (addcarry *, 0, Z)
2965  *            |       /
2966  *             \   Carry
2967  *              |   /
2968  * (addcarry X, *, *)
2969  *
2970  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2971  * produce a combine with a single path for carry propagation.
2972  */
2973 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2974                                       SDValue X, SDValue Carry0, SDValue Carry1,
2975                                       SDNode *N) {
2976   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2977     return SDValue();
2978   if (Carry1.getOpcode() != ISD::UADDO)
2979     return SDValue();
2980
2981   SDValue Z;
2982
2983   /**
2984    * First look for a suitable Z. It will present itself in the form of
2985    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2986    */
2987   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2988       isNullConstant(Carry0.getOperand(1))) {
2989     Z = Carry0.getOperand(2);
2990   } else if (Carry0.getOpcode() == ISD::UADDO &&
2991              isOneConstant(Carry0.getOperand(1))) {
2992     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2993     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2994   } else {
2995     // We couldn't find a suitable Z.
2996     return SDValue();
2997   }
2998
2999
3000   auto cancelDiamond = [&](SDValue A,SDValue B) {
3001     SDLoc DL(N);
3002     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3003     Combiner.AddToWorklist(NewY.getNode());
3004     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3005                        DAG.getConstant(0, DL, X.getValueType()),
3006                        NewY.getValue(1));
3007   };
3008
3009   /**
3010    *      (uaddo A, B)
3011    *           |
3012    *          Sum
3013    *           |
3014    * (addcarry *, 0, Z)
3015    */
3016   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3017     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3018   }
3019
3020   /**
3021    * (addcarry A, 0, Z)
3022    *         |
3023    *        Sum
3024    *         |
3025    *  (uaddo *, B)
3026    */
3027   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3028     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3029   }
3030
3031   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3032     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3033   }
3034
3035   return SDValue();
3036 }
3037
3038 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3039 // match patterns like:
3040 //
3041 //          (uaddo A, B)            CarryIn
3042 //            |  \                     |
3043 //            |   \                    |
3044 //    PartialSum   PartialCarryOutX   /
3045 //            |        |             /
3046 //            |    ____|____________/
3047 //            |   /    |
3048 //     (uaddo *, *)    \________
3049 //       |  \                   \
3050 //       |   \                   |
3051 //       |    PartialCarryOutY   |
3052 //       |        \              |
3053 //       |         \            /
3054 //   AddCarrySum    |    ______/
3055 //                  |   /
3056 //   CarryOut = (or *, *)
3057 //
3058 // And generate ADDCARRY (or SUBCARRY) with two result values:
3059 //
3060 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3061 //
3062 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3063 // a single path for carry/borrow out propagation:
3064 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3065                                    const TargetLowering &TLI, SDValue Carry0,
3066                                    SDValue Carry1, SDNode *N) {
3067   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3068     return SDValue();
3069   unsigned Opcode = Carry0.getOpcode();
3070   if (Opcode != Carry1.getOpcode())
3071     return SDValue();
3072   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3073     return SDValue();
3074
3075   // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3076   // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3077   // the above ASCII art.)
3078   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3079       Carry1.getOperand(1) != Carry0.getValue(0))
3080     std::swap(Carry0, Carry1);
3081   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3082       Carry1.getOperand(1) != Carry0.getValue(0))
3083     return SDValue();
3084
3085   // The carry in value must be on the righthand side for subtraction.
3086   unsigned CarryInOperandNum =
3087       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3088   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3089     return SDValue();
3090   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3091
3092   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3093   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3094     return SDValue();
3095
3096   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3097   // TODO: make getAsCarry() aware of how partial carries are merged.
3098   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3099     return SDValue();
3100   CarryIn = CarryIn.getOperand(0);
3101   if (CarryIn.getValueType() != MVT::i1)
3102     return SDValue();
3103
3104   SDLoc DL(N);
3105   SDValue Merged =
3106       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3107                   Carry0.getOperand(1), CarryIn);
3108
3109   // Please note that because we have proven that the result of the UADDO/USUBO
3110   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3111   // therefore prove that if the first UADDO/USUBO overflows, the second
3112   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3113   // maximum value.
3114   //
3115   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3116   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3117   //
3118   // This is important because it means that OR and XOR can be used to merge
3119   // carry flags; and that AND can return a constant zero.
3120   //
3121   // TODO: match other operations that can merge flags (ADD, etc)
3122   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3123   if (N->getOpcode() == ISD::AND)
3124     return DAG.getConstant(0, DL, MVT::i1);
3125   return Merged.getValue(1);
3126 }
3127
3128 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3129                                        SDNode *N) {
3130   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3131   if (isBitwiseNot(N0))
3132     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3133       SDLoc DL(N);
3134       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3135                                 N0.getOperand(0), NotC);
3136       return CombineTo(
3137           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3138     }
3139
3140   // Iff the flag result is dead:
3141   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3142   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3143   // or the dependency between the instructions.
3144   if ((N0.getOpcode() == ISD::ADD ||
3145        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3146         N0.getValue(1) != CarryIn)) &&
3147       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3148     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3149                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3150
3151   /**
3152    * When one of the addcarry argument is itself a carry, we may be facing
3153    * a diamond carry propagation. In which case we try to transform the DAG
3154    * to ensure linear carry propagation if that is possible.
3155    */
3156   if (auto Y = getAsCarry(TLI, N1)) {
3157     // Because both are carries, Y and Z can be swapped.
3158     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3159       return R;
3160     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3161       return R;
3162   }
3163
3164   return SDValue();
3165 }
3166
3167 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3168 // clamp/truncation if necessary.
3169 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3170                                    SDValue RHS, SelectionDAG &DAG,
3171                                    const SDLoc &DL) {
3172   assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3173          "Illegal truncation");
3174
3175   if (DstVT == SrcVT)
3176     return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3177
3178   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3179   // clamping RHS.
3180   APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3181                                           DstVT.getScalarSizeInBits());
3182   if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3183     return SDValue();
3184
3185   SDValue SatLimit =
3186       DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3187                                            DstVT.getScalarSizeInBits()),
3188                       DL, SrcVT);
3189   RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3190   RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3191   LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3192   return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3193 }
3194
3195 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3196 // usubsat(a,b), optionally as a truncated type.
3197 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3198   if (N->getOpcode() != ISD::SUB ||
3199       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3200     return SDValue();
3201
3202   EVT SubVT = N->getValueType(0);
3203   SDValue Op0 = N->getOperand(0);
3204   SDValue Op1 = N->getOperand(1);
3205
3206   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3207   // they may be converted to usubsat(a,b).
3208   if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3209     SDValue MaxLHS = Op0.getOperand(0);
3210     SDValue MaxRHS = Op0.getOperand(1);
3211     if (MaxLHS == Op1)
3212       return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3213     if (MaxRHS == Op1)
3214       return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3215   }
3216
3217   if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3218     SDValue MinLHS = Op1.getOperand(0);
3219     SDValue MinRHS = Op1.getOperand(1);
3220     if (MinLHS == Op0)
3221       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3222     if (MinRHS == Op0)
3223       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3224   }
3225
3226   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3227   if (Op1.getOpcode() == ISD::TRUNCATE &&
3228       Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3229       Op1.getOperand(0).hasOneUse()) {
3230     SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3231     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3232     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3233       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3234                                  DAG, SDLoc(N));
3235     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3236       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3237                                  DAG, SDLoc(N));
3238   }
3239
3240   return SDValue();
3241 }
3242
3243 // Since it may not be valid to emit a fold to zero for vector initializers
3244 // check if we can before folding.
3245 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3246                              SelectionDAG &DAG, bool LegalOperations) {
3247   if (!VT.isVector())
3248     return DAG.getConstant(0, DL, VT);
3249   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3250     return DAG.getConstant(0, DL, VT);
3251   return SDValue();
3252 }
3253
3254 SDValue DAGCombiner::visitSUB(SDNode *N) {
3255   SDValue N0 = N->getOperand(0);
3256   SDValue N1 = N->getOperand(1);
3257   EVT VT = N0.getValueType();
3258   SDLoc DL(N);
3259
3260   // fold vector ops
3261   if (VT.isVector()) {
3262     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3263       return FoldedVOp;
3264
3265     // fold (sub x, 0) -> x, vector edition
3266     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3267       return N0;
3268   }
3269
3270   // fold (sub x, x) -> 0
3271   // FIXME: Refactor this and xor and other similar operations together.
3272   if (N0 == N1)
3273     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3274
3275   // fold (sub c1, c2) -> c3
3276   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3277     return C;
3278
3279   if (SDValue NewSel = foldBinOpIntoSelect(N))
3280     return NewSel;
3281
3282   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3283
3284   // fold (sub x, c) -> (add x, -c)
3285   if (N1C) {
3286     return DAG.getNode(ISD::ADD, DL, VT, N0,
3287                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3288   }
3289
3290   if (isNullOrNullSplat(N0)) {
3291     unsigned BitWidth = VT.getScalarSizeInBits();
3292     // Right-shifting everything out but the sign bit followed by negation is
3293     // the same as flipping arithmetic/logical shift type without the negation:
3294     // -(X >>u 31) -> (X >>s 31)
3295     // -(X >>s 31) -> (X >>u 31)
3296     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3297       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3298       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3299         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3300         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3301           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3302       }
3303     }
3304
3305     // 0 - X --> 0 if the sub is NUW.
3306     if (N->getFlags().hasNoUnsignedWrap())
3307       return N0;
3308
3309     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3310       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3311       // N1 must be 0 because negating the minimum signed value is undefined.
3312       if (N->getFlags().hasNoSignedWrap())
3313         return N0;
3314
3315       // 0 - X --> X if X is 0 or the minimum signed value.
3316       return N1;
3317     }
3318
3319     // Convert 0 - abs(x).
3320     SDValue Result;
3321     if (N1->getOpcode() == ISD::ABS &&
3322         !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3323         TLI.expandABS(N1.getNode(), Result, DAG, true))
3324       return Result;
3325
3326     // Fold neg(splat(neg(x)) -> splat(x)
3327     if (VT.isVector()) {
3328       SDValue N1S = DAG.getSplatValue(N1, true);
3329       if (N1S && N1S.getOpcode() == ISD::SUB &&
3330           isNullConstant(N1S.getOperand(0))) {
3331         if (VT.isScalableVector())
3332           return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3333         return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3334       }
3335     }
3336   }
3337
3338   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3339   if (isAllOnesOrAllOnesSplat(N0))
3340     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3341
3342   // fold (A - (0-B)) -> A+B
3343   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3344     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3345
3346   // fold A-(A-B) -> B
3347   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3348     return N1.getOperand(1);
3349
3350   // fold (A+B)-A -> B
3351   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3352     return N0.getOperand(1);
3353
3354   // fold (A+B)-B -> A
3355   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3356     return N0.getOperand(0);
3357
3358   // fold (A+C1)-C2 -> A+(C1-C2)
3359   if (N0.getOpcode() == ISD::ADD &&
3360       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3361       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3362     SDValue NewC =
3363         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3364     assert(NewC && "Constant folding failed");
3365     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3366   }
3367
3368   // fold C2-(A+C1) -> (C2-C1)-A
3369   if (N1.getOpcode() == ISD::ADD) {
3370     SDValue N11 = N1.getOperand(1);
3371     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3372         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3373       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3374       assert(NewC && "Constant folding failed");
3375       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3376     }
3377   }
3378
3379   // fold (A-C1)-C2 -> A-(C1+C2)
3380   if (N0.getOpcode() == ISD::SUB &&
3381       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3382       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3383     SDValue NewC =
3384         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3385     assert(NewC && "Constant folding failed");
3386     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3387   }
3388
3389   // fold (c1-A)-c2 -> (c1-c2)-A
3390   if (N0.getOpcode() == ISD::SUB &&
3391       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3392       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3393     SDValue NewC =
3394         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3395     assert(NewC && "Constant folding failed");
3396     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3397   }
3398
3399   // fold ((A+(B+or-C))-B) -> A+or-C
3400   if (N0.getOpcode() == ISD::ADD &&
3401       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3402        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3403       N0.getOperand(1).getOperand(0) == N1)
3404     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3405                        N0.getOperand(1).getOperand(1));
3406
3407   // fold ((A+(C+B))-B) -> A+C
3408   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3409       N0.getOperand(1).getOperand(1) == N1)
3410     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3411                        N0.getOperand(1).getOperand(0));
3412
3413   // fold ((A-(B-C))-C) -> A-B
3414   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3415       N0.getOperand(1).getOperand(1) == N1)
3416     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3417                        N0.getOperand(1).getOperand(0));
3418
3419   // fold (A-(B-C)) -> A+(C-B)
3420   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3421     return DAG.getNode(ISD::ADD, DL, VT, N0,
3422                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3423                                    N1.getOperand(0)));
3424
3425   // A - (A & B)  ->  A & (~B)
3426   if (N1.getOpcode() == ISD::AND) {
3427     SDValue A = N1.getOperand(0);
3428     SDValue B = N1.getOperand(1);
3429     if (A != N0)
3430       std::swap(A, B);
3431     if (A == N0 &&
3432         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3433       SDValue InvB =
3434           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3435       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3436     }
3437   }
3438
3439   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3440   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3441     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3442         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3443       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3444                                 N1.getOperand(0).getOperand(1),
3445                                 N1.getOperand(1));
3446       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3447     }
3448     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3449         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3450       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3451                                 N1.getOperand(0),
3452                                 N1.getOperand(1).getOperand(1));
3453       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3454     }
3455   }
3456
3457   // If either operand of a sub is undef, the result is undef
3458   if (N0.isUndef())
3459     return N0;
3460   if (N1.isUndef())
3461     return N1;
3462
3463   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3464     return V;
3465
3466   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3467     return V;
3468
3469   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3470     return V;
3471
3472   if (SDValue V = foldSubToUSubSat(VT, N))
3473     return V;
3474
3475   // (x - y) - 1  ->  add (xor y, -1), x
3476   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3477     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3478                               DAG.getAllOnesConstant(DL, VT));
3479     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3480   }
3481
3482   // Look for:
3483   //   sub y, (xor x, -1)
3484   // And if the target does not like this form then turn into:
3485   //   add (add x, y), 1
3486   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3487     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3488     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3489   }
3490
3491   // Hoist one-use addition by non-opaque constant:
3492   //   (x + C) - y  ->  (x - y) + C
3493   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3494       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3495     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3496     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3497   }
3498   // y - (x + C)  ->  (y - x) - C
3499   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3500       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3501     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3502     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3503   }
3504   // (x - C) - y  ->  (x - y) - C
3505   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3506   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3507       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3508     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3509     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3510   }
3511   // (C - x) - y  ->  C - (x + y)
3512   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3513       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3514     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3515     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3516   }
3517
3518   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3519   // rather than 'sub 0/1' (the sext should get folded).
3520   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3521   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3522       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3523       TLI.getBooleanContents(VT) ==
3524           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3525     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3526     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3527   }
3528
3529   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3530   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3531     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3532       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3533       SDValue S0 = N1.getOperand(0);
3534       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3535         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3536           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3537             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3538     }
3539   }
3540
3541   // If the relocation model supports it, consider symbol offsets.
3542   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3543     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3544       // fold (sub Sym, c) -> Sym-c
3545       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3546         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3547                                     GA->getOffset() -
3548                                         (uint64_t)N1C->getSExtValue());
3549       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3550       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3551         if (GA->getGlobal() == GB->getGlobal())
3552           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3553                                  DL, VT);
3554     }
3555
3556   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3557   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3558     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3559     if (TN->getVT() == MVT::i1) {
3560       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3561                                  DAG.getConstant(1, DL, VT));
3562       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3563     }
3564   }
3565
3566   // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3567   if (N1.getOpcode() == ISD::VSCALE) {
3568     const APInt &IntVal = N1.getConstantOperandAPInt(0);
3569     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3570   }
3571
3572   // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3573   if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3574     APInt NewStep = -N1.getConstantOperandAPInt(0);
3575     return DAG.getNode(ISD::ADD, DL, VT, N0,
3576                        DAG.getStepVector(DL, VT, NewStep));
3577   }
3578
3579   // Prefer an add for more folding potential and possibly better codegen:
3580   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3581   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3582     SDValue ShAmt = N1.getOperand(1);
3583     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3584     if (ShAmtC &&
3585         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3586       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3587       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3588     }
3589   }
3590
3591   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3592     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3593     if (SDValue Carry = getAsCarry(TLI, N0)) {
3594       SDValue X = N1;
3595       SDValue Zero = DAG.getConstant(0, DL, VT);
3596       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3597       return DAG.getNode(ISD::ADDCARRY, DL,
3598                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3599                          Carry);
3600     }
3601   }
3602
3603   return SDValue();
3604 }
3605
3606 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3607   SDValue N0 = N->getOperand(0);
3608   SDValue N1 = N->getOperand(1);
3609   EVT VT = N0.getValueType();
3610   SDLoc DL(N);
3611
3612   // fold vector ops
3613   if (VT.isVector()) {
3614     // TODO SimplifyVBinOp
3615
3616     // fold (sub_sat x, 0) -> x, vector edition
3617     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3618       return N0;
3619   }
3620
3621   // fold (sub_sat x, undef) -> 0
3622   if (N0.isUndef() || N1.isUndef())
3623     return DAG.getConstant(0, DL, VT);
3624
3625   // fold (sub_sat x, x) -> 0
3626   if (N0 == N1)
3627     return DAG.getConstant(0, DL, VT);
3628
3629   // fold (sub_sat c1, c2) -> c3
3630   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3631     return C;
3632
3633   // fold (sub_sat x, 0) -> x
3634   if (isNullConstant(N1))
3635     return N0;
3636
3637   return SDValue();
3638 }
3639
3640 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3641   SDValue N0 = N->getOperand(0);
3642   SDValue N1 = N->getOperand(1);
3643   EVT VT = N0.getValueType();
3644   SDLoc DL(N);
3645
3646   // If the flag result is dead, turn this into an SUB.
3647   if (!N->hasAnyUseOfValue(1))
3648     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3649                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3650
3651   // fold (subc x, x) -> 0 + no borrow
3652   if (N0 == N1)
3653     return CombineTo(N, DAG.getConstant(0, DL, VT),
3654                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3655
3656   // fold (subc x, 0) -> x + no borrow
3657   if (isNullConstant(N1))
3658     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3659
3660   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3661   if (isAllOnesConstant(N0))
3662     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3663                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3664
3665   return SDValue();
3666 }
3667
3668 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3669   SDValue N0 = N->getOperand(0);
3670   SDValue N1 = N->getOperand(1);
3671   EVT VT = N0.getValueType();
3672   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3673
3674   EVT CarryVT = N->getValueType(1);
3675   SDLoc DL(N);
3676
3677   // If the flag result is dead, turn this into an SUB.
3678   if (!N->hasAnyUseOfValue(1))
3679     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3680                      DAG.getUNDEF(CarryVT));
3681
3682   // fold (subo x, x) -> 0 + no borrow
3683   if (N0 == N1)
3684     return CombineTo(N, DAG.getConstant(0, DL, VT),
3685                      DAG.getConstant(0, DL, CarryVT));
3686
3687   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3688
3689   // fold (subox, c) -> (addo x, -c)
3690   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3691     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3692                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3693   }
3694
3695   // fold (subo x, 0) -> x + no borrow
3696   if (isNullOrNullSplat(N1))
3697     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3698
3699   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3700   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3701     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3702                      DAG.getConstant(0, DL, CarryVT));
3703
3704   return SDValue();
3705 }
3706
3707 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3708   SDValue N0 = N->getOperand(0);
3709   SDValue N1 = N->getOperand(1);
3710   SDValue CarryIn = N->getOperand(2);
3711
3712   // fold (sube x, y, false) -> (subc x, y)
3713   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3714     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3715
3716   return SDValue();
3717 }
3718
3719 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3720   SDValue N0 = N->getOperand(0);
3721   SDValue N1 = N->getOperand(1);
3722   SDValue CarryIn = N->getOperand(2);
3723
3724   // fold (subcarry x, y, false) -> (usubo x, y)
3725   if (isNullConstant(CarryIn)) {
3726     if (!LegalOperations ||
3727         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3728       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3729   }
3730
3731   return SDValue();
3732 }
3733
3734 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3735   SDValue N0 = N->getOperand(0);
3736   SDValue N1 = N->getOperand(1);
3737   SDValue CarryIn = N->getOperand(2);
3738
3739   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3740   if (isNullConstant(CarryIn)) {
3741     if (!LegalOperations ||
3742         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3743       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3744   }
3745
3746   return SDValue();
3747 }
3748
3749 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3750 // UMULFIXSAT here.
3751 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3752   SDValue N0 = N->getOperand(0);
3753   SDValue N1 = N->getOperand(1);
3754   SDValue Scale = N->getOperand(2);
3755   EVT VT = N0.getValueType();
3756
3757   // fold (mulfix x, undef, scale) -> 0
3758   if (N0.isUndef() || N1.isUndef())
3759     return DAG.getConstant(0, SDLoc(N), VT);
3760
3761   // Canonicalize constant to RHS (vector doesn't have to splat)
3762   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3763      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3764     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3765
3766   // fold (mulfix x, 0, scale) -> 0
3767   if (isNullConstant(N1))
3768     return DAG.getConstant(0, SDLoc(N), VT);
3769
3770   return SDValue();
3771 }
3772
3773 SDValue DAGCombiner::visitMUL(SDNode *N) {
3774   SDValue N0 = N->getOperand(0);
3775   SDValue N1 = N->getOperand(1);
3776   EVT VT = N0.getValueType();
3777
3778   // fold (mul x, undef) -> 0
3779   if (N0.isUndef() || N1.isUndef())
3780     return DAG.getConstant(0, SDLoc(N), VT);
3781
3782   bool N1IsConst = false;
3783   bool N1IsOpaqueConst = false;
3784   APInt ConstValue1;
3785
3786   // fold vector ops
3787   if (VT.isVector()) {
3788     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3789       return FoldedVOp;
3790
3791     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3792     assert((!N1IsConst ||
3793             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3794            "Splat APInt should be element width");
3795   } else {
3796     N1IsConst = isa<ConstantSDNode>(N1);
3797     if (N1IsConst) {
3798       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3799       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3800     }
3801   }
3802
3803   // fold (mul c1, c2) -> c1*c2
3804   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3805     return C;
3806
3807   // canonicalize constant to RHS (vector doesn't have to splat)
3808   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3809      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3810     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3811
3812   // fold (mul x, 0) -> 0
3813   if (N1IsConst && ConstValue1.isNullValue())
3814     return N1;
3815
3816   // fold (mul x, 1) -> x
3817   if (N1IsConst && ConstValue1.isOneValue())
3818     return N0;
3819
3820   if (SDValue NewSel = foldBinOpIntoSelect(N))
3821     return NewSel;
3822
3823   // fold (mul x, -1) -> 0-x
3824   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3825     SDLoc DL(N);
3826     return DAG.getNode(ISD::SUB, DL, VT,
3827                        DAG.getConstant(0, DL, VT), N0);
3828   }
3829
3830   // fold (mul x, (1 << c)) -> x << c
3831   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3832       DAG.isKnownToBeAPowerOfTwo(N1) &&
3833       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3834     SDLoc DL(N);
3835     SDValue LogBase2 = BuildLogBase2(N1, DL);
3836     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3837     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3838     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3839   }
3840
3841   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3842   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3843     unsigned Log2Val = (-ConstValue1).logBase2();
3844     SDLoc DL(N);
3845     // FIXME: If the input is something that is easily negated (e.g. a
3846     // single-use add), we should put the negate there.
3847     return DAG.getNode(ISD::SUB, DL, VT,
3848                        DAG.getConstant(0, DL, VT),
3849                        DAG.getNode(ISD::SHL, DL, VT, N0,
3850                             DAG.getConstant(Log2Val, DL,
3851                                       getShiftAmountTy(N0.getValueType()))));
3852   }
3853
3854   // Try to transform:
3855   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3856   // mul x, (2^N + 1) --> add (shl x, N), x
3857   // mul x, (2^N - 1) --> sub (shl x, N), x
3858   // Examples: x * 33 --> (x << 5) + x
3859   //           x * 15 --> (x << 4) - x
3860   //           x * -33 --> -((x << 5) + x)
3861   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3862   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3863   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3864   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3865   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3866   //           x * 0xf800 --> (x << 16) - (x << 11)
3867   //           x * -0x8800 --> -((x << 15) + (x << 11))
3868   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3869   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3870     // TODO: We could handle more general decomposition of any constant by
3871     //       having the target set a limit on number of ops and making a
3872     //       callback to determine that sequence (similar to sqrt expansion).
3873     unsigned MathOp = ISD::DELETED_NODE;
3874     APInt MulC = ConstValue1.abs();
3875     // The constant `2` should be treated as (2^0 + 1).
3876     unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3877     MulC.lshrInPlace(TZeros);
3878     if ((MulC - 1).isPowerOf2())
3879       MathOp = ISD::ADD;
3880     else if ((MulC + 1).isPowerOf2())
3881       MathOp = ISD::SUB;
3882
3883     if (MathOp != ISD::DELETED_NODE) {
3884       unsigned ShAmt =
3885           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3886       ShAmt += TZeros;
3887       assert(ShAmt < VT.getScalarSizeInBits() &&
3888              "multiply-by-constant generated out of bounds shift");
3889       SDLoc DL(N);
3890       SDValue Shl =
3891           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3892       SDValue R =
3893           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3894                                DAG.getNode(ISD::SHL, DL, VT, N0,
3895                                            DAG.getConstant(TZeros, DL, VT)))
3896                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
3897       if (ConstValue1.isNegative())
3898         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3899       return R;
3900     }
3901   }
3902
3903   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3904   if (N0.getOpcode() == ISD::SHL &&
3905       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3906       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3907     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3908     if (isConstantOrConstantVector(C3))
3909       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3910   }
3911
3912   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3913   // use.
3914   {
3915     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3916
3917     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3918     if (N0.getOpcode() == ISD::SHL &&
3919         isConstantOrConstantVector(N0.getOperand(1)) &&
3920         N0.getNode()->hasOneUse()) {
3921       Sh = N0; Y = N1;
3922     } else if (N1.getOpcode() == ISD::SHL &&
3923                isConstantOrConstantVector(N1.getOperand(1)) &&
3924                N1.getNode()->hasOneUse()) {
3925       Sh = N1; Y = N0;
3926     }
3927
3928     if (Sh.getNode()) {
3929       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3930       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3931     }
3932   }
3933
3934   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3935   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3936       N0.getOpcode() == ISD::ADD &&
3937       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3938       isMulAddWithConstProfitable(N, N0, N1))
3939       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3940                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3941                                      N0.getOperand(0), N1),
3942                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3943                                      N0.getOperand(1), N1));
3944
3945   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3946   if (N0.getOpcode() == ISD::VSCALE)
3947     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3948       const APInt &C0 = N0.getConstantOperandAPInt(0);
3949       const APInt &C1 = NC1->getAPIntValue();
3950       return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3951     }
3952
3953   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
3954   APInt MulVal;
3955   if (N0.getOpcode() == ISD::STEP_VECTOR)
3956     if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
3957       const APInt &C0 = N0.getConstantOperandAPInt(0);
3958       APInt NewStep = C0 * MulVal;
3959       return DAG.getStepVector(SDLoc(N), VT, NewStep);
3960     }
3961
3962   // Fold ((mul x, 0/undef) -> 0,
3963   //       (mul x, 1) -> x) -> x)
3964   // -> and(x, mask)
3965   // We can replace vectors with '0' and '1' factors with a clearing mask.
3966   if (VT.isFixedLengthVector()) {
3967     unsigned NumElts = VT.getVectorNumElements();
3968     SmallBitVector ClearMask;
3969     ClearMask.reserve(NumElts);
3970     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3971       if (!V || V->isNullValue()) {
3972         ClearMask.push_back(true);
3973         return true;
3974       }
3975       ClearMask.push_back(false);
3976       return V->isOne();
3977     };
3978     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3979         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3980       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
3981       SDLoc DL(N);
3982       EVT LegalSVT = N1.getOperand(0).getValueType();
3983       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3984       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3985       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3986       for (unsigned I = 0; I != NumElts; ++I)
3987         if (ClearMask[I])
3988           Mask[I] = Zero;
3989       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3990     }
3991   }
3992
3993   // reassociate mul
3994   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3995     return RMUL;
3996
3997   return SDValue();
3998 }
3999
4000 /// Return true if divmod libcall is available.
4001 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4002                                      const TargetLowering &TLI) {
4003   RTLIB::Libcall LC;
4004   EVT NodeType = Node->getValueType(0);
4005   if (!NodeType.isSimple())
4006     return false;
4007   switch (NodeType.getSimpleVT().SimpleTy) {
4008   default: return false; // No libcall for vector types.
4009   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
4010   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4011   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4012   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4013   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4014   }
4015
4016   return TLI.getLibcallName(LC) != nullptr;
4017 }
4018
4019 /// Issue divrem if both quotient and remainder are needed.
4020 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4021   if (Node->use_empty())
4022     return SDValue(); // This is a dead node, leave it alone.
4023
4024   unsigned Opcode = Node->getOpcode();
4025   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4026   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4027
4028   // DivMod lib calls can still work on non-legal types if using lib-calls.
4029   EVT VT = Node->getValueType(0);
4030   if (VT.isVector() || !VT.isInteger())
4031     return SDValue();
4032
4033   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4034     return SDValue();
4035
4036   // If DIVREM is going to get expanded into a libcall,
4037   // but there is no libcall available, then don't combine.
4038   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4039       !isDivRemLibcallAvailable(Node, isSigned, TLI))
4040     return SDValue();
4041
4042   // If div is legal, it's better to do the normal expansion
4043   unsigned OtherOpcode = 0;
4044   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4045     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4046     if (TLI.isOperationLegalOrCustom(Opcode, VT))
4047       return SDValue();
4048   } else {
4049     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4050     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4051       return SDValue();
4052   }
4053
4054   SDValue Op0 = Node->getOperand(0);
4055   SDValue Op1 = Node->getOperand(1);
4056   SDValue combined;
4057   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
4058          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
4059     SDNode *User = *UI;
4060     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4061         User->use_empty())
4062       continue;
4063     // Convert the other matching node(s), too;
4064     // otherwise, the DIVREM may get target-legalized into something
4065     // target-specific that we won't be able to recognize.
4066     unsigned UserOpc = User->getOpcode();
4067     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4068         User->getOperand(0) == Op0 &&
4069         User->getOperand(1) == Op1) {
4070       if (!combined) {
4071         if (UserOpc == OtherOpcode) {
4072           SDVTList VTs = DAG.getVTList(VT, VT);
4073           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4074         } else if (UserOpc == DivRemOpc) {
4075           combined = SDValue(User, 0);
4076         } else {
4077           assert(UserOpc == Opcode);
4078           continue;
4079         }
4080       }
4081       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4082         CombineTo(User, combined);
4083       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4084         CombineTo(User, combined.getValue(1));
4085     }
4086   }
4087   return combined;
4088 }
4089
4090 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4091   SDValue N0 = N->getOperand(0);
4092   SDValue N1 = N->getOperand(1);
4093   EVT VT = N->getValueType(0);
4094   SDLoc DL(N);
4095
4096   unsigned Opc = N->getOpcode();
4097   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4098   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4099
4100   // X / undef -> undef
4101   // X % undef -> undef
4102   // X / 0 -> undef
4103   // X % 0 -> undef
4104   // NOTE: This includes vectors where any divisor element is zero/undef.
4105   if (DAG.isUndef(Opc, {N0, N1}))
4106     return DAG.getUNDEF(VT);
4107
4108   // undef / X -> 0
4109   // undef % X -> 0
4110   if (N0.isUndef())
4111     return DAG.getConstant(0, DL, VT);
4112
4113   // 0 / X -> 0
4114   // 0 % X -> 0
4115   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4116   if (N0C && N0C->isNullValue())
4117     return N0;
4118
4119   // X / X -> 1
4120   // X % X -> 0
4121   if (N0 == N1)
4122     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4123
4124   // X / 1 -> X
4125   // X % 1 -> 0
4126   // If this is a boolean op (single-bit element type), we can't have
4127   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4128   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4129   // it's a 1.
4130   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4131     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4132
4133   return SDValue();
4134 }
4135
4136 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4137   SDValue N0 = N->getOperand(0);
4138   SDValue N1 = N->getOperand(1);
4139   EVT VT = N->getValueType(0);
4140   EVT CCVT = getSetCCResultType(VT);
4141
4142   // fold vector ops
4143   if (VT.isVector())
4144     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4145       return FoldedVOp;
4146
4147   SDLoc DL(N);
4148
4149   // fold (sdiv c1, c2) -> c1/c2
4150   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4151   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4152     return C;
4153
4154   // fold (sdiv X, -1) -> 0-X
4155   if (N1C && N1C->isAllOnesValue())
4156     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4157
4158   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4159   if (N1C && N1C->getAPIntValue().isMinSignedValue())
4160     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4161                          DAG.getConstant(1, DL, VT),
4162                          DAG.getConstant(0, DL, VT));
4163
4164   if (SDValue V = simplifyDivRem(N, DAG))
4165     return V;
4166
4167   if (SDValue NewSel = foldBinOpIntoSelect(N))
4168     return NewSel;
4169
4170   // If we know the sign bits of both operands are zero, strength reduce to a
4171   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4172   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4173     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4174
4175   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4176     // If the corresponding remainder node exists, update its users with
4177     // (Dividend - (Quotient * Divisor).
4178     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4179                                               { N0, N1 })) {
4180       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4181       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4182       AddToWorklist(Mul.getNode());
4183       AddToWorklist(Sub.getNode());
4184       CombineTo(RemNode, Sub);
4185     }
4186     return V;
4187   }
4188
4189   // sdiv, srem -> sdivrem
4190   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4191   // true.  Otherwise, we break the simplification logic in visitREM().
4192   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4193   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4194     if (SDValue DivRem = useDivRem(N))
4195         return DivRem;
4196
4197   return SDValue();
4198 }
4199
4200 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4201   SDLoc DL(N);
4202   EVT VT = N->getValueType(0);
4203   EVT CCVT = getSetCCResultType(VT);
4204   unsigned BitWidth = VT.getScalarSizeInBits();
4205
4206   // Helper for determining whether a value is a power-2 constant scalar or a
4207   // vector of such elements.
4208   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4209     if (C->isNullValue() || C->isOpaque())
4210       return false;
4211     if (C->getAPIntValue().isPowerOf2())
4212       return true;
4213     if ((-C->getAPIntValue()).isPowerOf2())
4214       return true;
4215     return false;
4216   };
4217
4218   // fold (sdiv X, pow2) -> simple ops after legalize
4219   // FIXME: We check for the exact bit here because the generic lowering gives
4220   // better results in that case. The target-specific lowering should learn how
4221   // to handle exact sdivs efficiently.
4222   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4223     // Target-specific implementation of sdiv x, pow2.
4224     if (SDValue Res = BuildSDIVPow2(N))
4225       return Res;
4226
4227     // Create constants that are functions of the shift amount value.
4228     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4229     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4230     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4231     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4232     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4233     if (!isConstantOrConstantVector(Inexact))
4234       return SDValue();
4235
4236     // Splat the sign bit into the register
4237     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4238                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4239     AddToWorklist(Sign.getNode());
4240
4241     // Add (N0 < 0) ? abs2 - 1 : 0;
4242     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4243     AddToWorklist(Srl.getNode());
4244     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4245     AddToWorklist(Add.getNode());
4246     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4247     AddToWorklist(Sra.getNode());
4248
4249     // Special case: (sdiv X, 1) -> X
4250     // Special Case: (sdiv X, -1) -> 0-X
4251     SDValue One = DAG.getConstant(1, DL, VT);
4252     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4253     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4254     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4255     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4256     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4257
4258     // If dividing by a positive value, we're done. Otherwise, the result must
4259     // be negated.
4260     SDValue Zero = DAG.getConstant(0, DL, VT);
4261     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4262
4263     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4264     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4265     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4266     return Res;
4267   }
4268
4269   // If integer divide is expensive and we satisfy the requirements, emit an
4270   // alternate sequence.  Targets may check function attributes for size/speed
4271   // trade-offs.
4272   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4273   if (isConstantOrConstantVector(N1) &&
4274       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4275     if (SDValue Op = BuildSDIV(N))
4276       return Op;
4277
4278   return SDValue();
4279 }
4280
4281 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4282   SDValue N0 = N->getOperand(0);
4283   SDValue N1 = N->getOperand(1);
4284   EVT VT = N->getValueType(0);
4285   EVT CCVT = getSetCCResultType(VT);
4286
4287   // fold vector ops
4288   if (VT.isVector())
4289     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4290       return FoldedVOp;
4291
4292   SDLoc DL(N);
4293
4294   // fold (udiv c1, c2) -> c1/c2
4295   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4296   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4297     return C;
4298
4299   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4300   if (N1C && N1C->getAPIntValue().isAllOnesValue())
4301     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4302                          DAG.getConstant(1, DL, VT),
4303                          DAG.getConstant(0, DL, VT));
4304
4305   if (SDValue V = simplifyDivRem(N, DAG))
4306     return V;
4307
4308   if (SDValue NewSel = foldBinOpIntoSelect(N))
4309     return NewSel;
4310
4311   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4312     // If the corresponding remainder node exists, update its users with
4313     // (Dividend - (Quotient * Divisor).
4314     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4315                                               { N0, N1 })) {
4316       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4317       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4318       AddToWorklist(Mul.getNode());
4319       AddToWorklist(Sub.getNode());
4320       CombineTo(RemNode, Sub);
4321     }
4322     return V;
4323   }
4324
4325   // sdiv, srem -> sdivrem
4326   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4327   // true.  Otherwise, we break the simplification logic in visitREM().
4328   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4329   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4330     if (SDValue DivRem = useDivRem(N))
4331         return DivRem;
4332
4333   return SDValue();
4334 }
4335
4336 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4337   SDLoc DL(N);
4338   EVT VT = N->getValueType(0);
4339
4340   // fold (udiv x, (1 << c)) -> x >>u c
4341   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4342       DAG.isKnownToBeAPowerOfTwo(N1)) {
4343     SDValue LogBase2 = BuildLogBase2(N1, DL);
4344     AddToWorklist(LogBase2.getNode());
4345
4346     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4347     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4348     AddToWorklist(Trunc.getNode());
4349     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4350   }
4351
4352   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4353   if (N1.getOpcode() == ISD::SHL) {
4354     SDValue N10 = N1.getOperand(0);
4355     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4356         DAG.isKnownToBeAPowerOfTwo(N10)) {
4357       SDValue LogBase2 = BuildLogBase2(N10, DL);
4358       AddToWorklist(LogBase2.getNode());
4359
4360       EVT ADDVT = N1.getOperand(1).getValueType();
4361       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4362       AddToWorklist(Trunc.getNode());
4363       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4364       AddToWorklist(Add.getNode());
4365       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4366     }
4367   }
4368
4369   // fold (udiv x, c) -> alternate
4370   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4371   if (isConstantOrConstantVector(N1) &&
4372       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4373     if (SDValue Op = BuildUDIV(N))
4374       return Op;
4375
4376   return SDValue();
4377 }
4378
4379 // handles ISD::SREM and ISD::UREM
4380 SDValue DAGCombiner::visitREM(SDNode *N) {
4381   unsigned Opcode = N->getOpcode();
4382   SDValue N0 = N->getOperand(0);
4383   SDValue N1 = N->getOperand(1);
4384   EVT VT = N->getValueType(0);
4385   EVT CCVT = getSetCCResultType(VT);
4386
4387   bool isSigned = (Opcode == ISD::SREM);
4388   SDLoc DL(N);
4389
4390   // fold (rem c1, c2) -> c1%c2
4391   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4392   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4393     return C;
4394
4395   // fold (urem X, -1) -> select(X == -1, 0, x)
4396   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4397     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4398                          DAG.getConstant(0, DL, VT), N0);
4399
4400   if (SDValue V = simplifyDivRem(N, DAG))
4401     return V;
4402
4403   if (SDValue NewSel = foldBinOpIntoSelect(N))
4404     return NewSel;
4405
4406   if (isSigned) {
4407     // If we know the sign bits of both operands are zero, strength reduce to a
4408     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4409     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4410       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4411   } else {
4412     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4413       // fold (urem x, pow2) -> (and x, pow2-1)
4414       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4415       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4416       AddToWorklist(Add.getNode());
4417       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4418     }
4419     if (N1.getOpcode() == ISD::SHL &&
4420         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4421       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4422       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4423       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4424       AddToWorklist(Add.getNode());
4425       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4426     }
4427   }
4428
4429   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4430
4431   // If X/C can be simplified by the division-by-constant logic, lower
4432   // X%C to the equivalent of X-X/C*C.
4433   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4434   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4435   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4436   // combine will not return a DIVREM.  Regardless, checking cheapness here
4437   // makes sense since the simplification results in fatter code.
4438   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4439     SDValue OptimizedDiv =
4440         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4441     if (OptimizedDiv.getNode()) {
4442       // If the equivalent Div node also exists, update its users.
4443       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4444       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4445                                                 { N0, N1 }))
4446         CombineTo(DivNode, OptimizedDiv);
4447       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4448       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4449       AddToWorklist(OptimizedDiv.getNode());
4450       AddToWorklist(Mul.getNode());
4451       return Sub;
4452     }
4453   }
4454
4455   // sdiv, srem -> sdivrem
4456   if (SDValue DivRem = useDivRem(N))
4457     return DivRem.getValue(1);
4458
4459   return SDValue();
4460 }
4461
4462 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4463   SDValue N0 = N->getOperand(0);
4464   SDValue N1 = N->getOperand(1);
4465   EVT VT = N->getValueType(0);
4466   SDLoc DL(N);
4467
4468   if (VT.isVector()) {
4469     // fold (mulhs x, 0) -> 0
4470     // do not return N0/N1, because undef node may exist.
4471     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4472         ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4473       return DAG.getConstant(0, DL, VT);
4474   }
4475
4476   // fold (mulhs c1, c2)
4477   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4478     return C;
4479
4480   // fold (mulhs x, 0) -> 0
4481   if (isNullConstant(N1))
4482     return N1;
4483   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4484   if (isOneConstant(N1))
4485     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4486                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4487                                        getShiftAmountTy(N0.getValueType())));
4488
4489   // fold (mulhs x, undef) -> 0
4490   if (N0.isUndef() || N1.isUndef())
4491     return DAG.getConstant(0, DL, VT);
4492
4493   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4494   // plus a shift.
4495   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4496       !VT.isVector()) {
4497     MVT Simple = VT.getSimpleVT();
4498     unsigned SimpleSize = Simple.getSizeInBits();
4499     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4500     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4501       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4502       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4503       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4504       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4505             DAG.getConstant(SimpleSize, DL,
4506                             getShiftAmountTy(N1.getValueType())));
4507       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4508     }
4509   }
4510
4511   return SDValue();
4512 }
4513
4514 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4515   SDValue N0 = N->getOperand(0);
4516   SDValue N1 = N->getOperand(1);
4517   EVT VT = N->getValueType(0);
4518   SDLoc DL(N);
4519
4520   if (VT.isVector()) {
4521     // fold (mulhu x, 0) -> 0
4522     // do not return N0/N1, because undef node may exist.
4523     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4524         ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4525       return DAG.getConstant(0, DL, VT);
4526   }
4527
4528   // fold (mulhu c1, c2)
4529   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4530     return C;
4531
4532   // fold (mulhu x, 0) -> 0
4533   if (isNullConstant(N1))
4534     return N1;
4535   // fold (mulhu x, 1) -> 0
4536   if (isOneConstant(N1))
4537     return DAG.getConstant(0, DL, N0.getValueType());
4538   // fold (mulhu x, undef) -> 0
4539   if (N0.isUndef() || N1.isUndef())
4540     return DAG.getConstant(0, DL, VT);
4541
4542   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4543   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4544       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4545     unsigned NumEltBits = VT.getScalarSizeInBits();
4546     SDValue LogBase2 = BuildLogBase2(N1, DL);
4547     SDValue SRLAmt = DAG.getNode(
4548         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4549     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4550     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4551     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4552   }
4553
4554   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4555   // plus a shift.
4556   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4557       !VT.isVector()) {
4558     MVT Simple = VT.getSimpleVT();
4559     unsigned SimpleSize = Simple.getSizeInBits();
4560     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4561     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4562       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4563       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4564       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4565       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4566             DAG.getConstant(SimpleSize, DL,
4567                             getShiftAmountTy(N1.getValueType())));
4568       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4569     }
4570   }
4571
4572   return SDValue();
4573 }
4574
4575 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4576 /// give the opcodes for the two computations that are being performed. Return
4577 /// true if a simplification was made.
4578 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4579                                                 unsigned HiOp) {
4580   // If the high half is not needed, just compute the low half.
4581   bool HiExists = N->hasAnyUseOfValue(1);
4582   if (!HiExists && (!LegalOperations ||
4583                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4584     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4585     return CombineTo(N, Res, Res);
4586   }
4587
4588   // If the low half is not needed, just compute the high half.
4589   bool LoExists = N->hasAnyUseOfValue(0);
4590   if (!LoExists && (!LegalOperations ||
4591                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4592     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4593     return CombineTo(N, Res, Res);
4594   }
4595
4596   // If both halves are used, return as it is.
4597   if (LoExists && HiExists)
4598     return SDValue();
4599
4600   // If the two computed results can be simplified separately, separate them.
4601   if (LoExists) {
4602     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4603     AddToWorklist(Lo.getNode());
4604     SDValue LoOpt = combine(Lo.getNode());
4605     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4606         (!LegalOperations ||
4607          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4608       return CombineTo(N, LoOpt, LoOpt);
4609   }
4610
4611   if (HiExists) {
4612     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4613     AddToWorklist(Hi.getNode());
4614     SDValue HiOpt = combine(Hi.getNode());
4615     if (HiOpt.getNode() && HiOpt != Hi &&
4616         (!LegalOperations ||
4617          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4618       return CombineTo(N, HiOpt, HiOpt);
4619   }
4620
4621   return SDValue();
4622 }
4623
4624 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4625   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4626     return Res;
4627
4628   EVT VT = N->getValueType(0);
4629   SDLoc DL(N);
4630
4631   // If the type is twice as wide is legal, transform the mulhu to a wider
4632   // multiply plus a shift.
4633   if (VT.isSimple() && !VT.isVector()) {
4634     MVT Simple = VT.getSimpleVT();
4635     unsigned SimpleSize = Simple.getSizeInBits();
4636     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4637     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4638       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4639       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4640       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4641       // Compute the high part as N1.
4642       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4643             DAG.getConstant(SimpleSize, DL,
4644                             getShiftAmountTy(Lo.getValueType())));
4645       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4646       // Compute the low part as N0.
4647       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4648       return CombineTo(N, Lo, Hi);
4649     }
4650   }
4651
4652   return SDValue();
4653 }
4654
4655 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4656   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4657     return Res;
4658
4659   EVT VT = N->getValueType(0);
4660   SDLoc DL(N);
4661
4662   // (umul_lohi N0, 0) -> (0, 0)
4663   if (isNullConstant(N->getOperand(1))) {
4664     SDValue Zero = DAG.getConstant(0, DL, VT);
4665     return CombineTo(N, Zero, Zero);
4666   }
4667
4668   // (umul_lohi N0, 1) -> (N0, 0)
4669   if (isOneConstant(N->getOperand(1))) {
4670     SDValue Zero = DAG.getConstant(0, DL, VT);
4671     return CombineTo(N, N->getOperand(0), Zero);
4672   }
4673
4674   // If the type is twice as wide is legal, transform the mulhu to a wider
4675   // multiply plus a shift.
4676   if (VT.isSimple() && !VT.isVector()) {
4677     MVT Simple = VT.getSimpleVT();
4678     unsigned SimpleSize = Simple.getSizeInBits();
4679     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4680     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4681       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4682       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4683       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4684       // Compute the high part as N1.
4685       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4686             DAG.getConstant(SimpleSize, DL,
4687                             getShiftAmountTy(Lo.getValueType())));
4688       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4689       // Compute the low part as N0.
4690       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4691       return CombineTo(N, Lo, Hi);
4692     }
4693   }
4694
4695   return SDValue();
4696 }
4697
4698 SDValue DAGCombiner::visitMULO(SDNode *N) {
4699   SDValue N0 = N->getOperand(0);
4700   SDValue N1 = N->getOperand(1);
4701   EVT VT = N0.getValueType();
4702   bool IsSigned = (ISD::SMULO == N->getOpcode());
4703
4704   EVT CarryVT = N->getValueType(1);
4705   SDLoc DL(N);
4706
4707   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4708   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4709
4710   // fold operation with constant operands.
4711   // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4712   // multiple results.
4713   if (N0C && N1C) {
4714     bool Overflow;
4715     APInt Result =
4716         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4717                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4718     return CombineTo(N, DAG.getConstant(Result, DL, VT),
4719                      DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4720   }
4721
4722   // canonicalize constant to RHS.
4723   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4724       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4725     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4726
4727   // fold (mulo x, 0) -> 0 + no carry out
4728   if (isNullOrNullSplat(N1))
4729     return CombineTo(N, DAG.getConstant(0, DL, VT),
4730                      DAG.getConstant(0, DL, CarryVT));
4731
4732   // (mulo x, 2) -> (addo x, x)
4733   if (N1C && N1C->getAPIntValue() == 2)
4734     return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4735                        N->getVTList(), N0, N0);
4736
4737   if (IsSigned) {
4738     // A 1 bit SMULO overflows if both inputs are 1.
4739     if (VT.getScalarSizeInBits() == 1) {
4740       SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4741       return CombineTo(N, And,
4742                        DAG.getSetCC(DL, CarryVT, And,
4743                                     DAG.getConstant(0, DL, VT), ISD::SETNE));
4744     }
4745
4746     // Multiplying n * m significant bits yields a result of n + m significant
4747     // bits. If the total number of significant bits does not exceed the
4748     // result bit width (minus 1), there is no overflow.
4749     unsigned SignBits = DAG.ComputeNumSignBits(N0);
4750     if (SignBits > 1)
4751       SignBits += DAG.ComputeNumSignBits(N1);
4752     if (SignBits > VT.getScalarSizeInBits() + 1)
4753       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4754                        DAG.getConstant(0, DL, CarryVT));
4755   } else {
4756     KnownBits N1Known = DAG.computeKnownBits(N1);
4757     KnownBits N0Known = DAG.computeKnownBits(N0);
4758     bool Overflow;
4759     (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4760     if (!Overflow)
4761       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4762                        DAG.getConstant(0, DL, CarryVT));
4763   }
4764
4765   return SDValue();
4766 }
4767
4768 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4769   SDValue N0 = N->getOperand(0);
4770   SDValue N1 = N->getOperand(1);
4771   EVT VT = N0.getValueType();
4772   unsigned Opcode = N->getOpcode();
4773
4774   // fold vector ops
4775   if (VT.isVector())
4776     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4777       return FoldedVOp;
4778
4779   // fold operation with constant operands.
4780   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4781     return C;
4782
4783   // canonicalize constant to RHS
4784   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4785       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4786     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4787
4788   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4789   // Only do this if the current op isn't legal and the flipped is.
4790   if (!TLI.isOperationLegal(Opcode, VT) &&
4791       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4792       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4793     unsigned AltOpcode;
4794     switch (Opcode) {
4795     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4796     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4797     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4798     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4799     default: llvm_unreachable("Unknown MINMAX opcode");
4800     }
4801     if (TLI.isOperationLegal(AltOpcode, VT))
4802       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4803   }
4804
4805   // Simplify the operands using demanded-bits information.
4806   if (SimplifyDemandedBits(SDValue(N, 0)))
4807     return SDValue(N, 0);
4808
4809   return SDValue();
4810 }
4811
4812 /// If this is a bitwise logic instruction and both operands have the same
4813 /// opcode, try to sink the other opcode after the logic instruction.
4814 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4815   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4816   EVT VT = N0.getValueType();
4817   unsigned LogicOpcode = N->getOpcode();
4818   unsigned HandOpcode = N0.getOpcode();
4819   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4820           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4821   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4822
4823   // Bail early if none of these transforms apply.
4824   if (N0.getNumOperands() == 0)
4825     return SDValue();
4826
4827   // FIXME: We should check number of uses of the operands to not increase
4828   //        the instruction count for all transforms.
4829
4830   // Handle size-changing casts.
4831   SDValue X = N0.getOperand(0);
4832   SDValue Y = N1.getOperand(0);
4833   EVT XVT = X.getValueType();
4834   SDLoc DL(N);
4835   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4836       HandOpcode == ISD::SIGN_EXTEND) {
4837     // If both operands have other uses, this transform would create extra
4838     // instructions without eliminating anything.
4839     if (!N0.hasOneUse() && !N1.hasOneUse())
4840       return SDValue();
4841     // We need matching integer source types.
4842     if (XVT != Y.getValueType())
4843       return SDValue();
4844     // Don't create an illegal op during or after legalization. Don't ever
4845     // create an unsupported vector op.
4846     if ((VT.isVector() || LegalOperations) &&
4847         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4848       return SDValue();
4849     // Avoid infinite looping with PromoteIntBinOp.
4850     // TODO: Should we apply desirable/legal constraints to all opcodes?
4851     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4852         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4853       return SDValue();
4854     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4855     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4856     return DAG.getNode(HandOpcode, DL, VT, Logic);
4857   }
4858
4859   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4860   if (HandOpcode == ISD::TRUNCATE) {
4861     // If both operands have other uses, this transform would create extra
4862     // instructions without eliminating anything.
4863     if (!N0.hasOneUse() && !N1.hasOneUse())
4864       return SDValue();
4865     // We need matching source types.
4866     if (XVT != Y.getValueType())
4867       return SDValue();
4868     // Don't create an illegal op during or after legalization.
4869     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4870       return SDValue();
4871     // Be extra careful sinking truncate. If it's free, there's no benefit in
4872     // widening a binop. Also, don't create a logic op on an illegal type.
4873     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4874       return SDValue();
4875     if (!TLI.isTypeLegal(XVT))
4876       return SDValue();
4877     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4878     return DAG.getNode(HandOpcode, DL, VT, Logic);
4879   }
4880
4881   // For binops SHL/SRL/SRA/AND:
4882   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4883   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4884        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4885       N0.getOperand(1) == N1.getOperand(1)) {
4886     // If either operand has other uses, this transform is not an improvement.
4887     if (!N0.hasOneUse() || !N1.hasOneUse())
4888       return SDValue();
4889     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4890     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4891   }
4892
4893   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4894   if (HandOpcode == ISD::BSWAP) {
4895     // If either operand has other uses, this transform is not an improvement.
4896     if (!N0.hasOneUse() || !N1.hasOneUse())
4897       return SDValue();
4898     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4899     return DAG.getNode(HandOpcode, DL, VT, Logic);
4900   }
4901
4902   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4903   // Only perform this optimization up until type legalization, before
4904   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4905   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4906   // we don't want to undo this promotion.
4907   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4908   // on scalars.
4909   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4910        Level <= AfterLegalizeTypes) {
4911     // Input types must be integer and the same.
4912     if (XVT.isInteger() && XVT == Y.getValueType() &&
4913         !(VT.isVector() && TLI.isTypeLegal(VT) &&
4914           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4915       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4916       return DAG.getNode(HandOpcode, DL, VT, Logic);
4917     }
4918   }
4919
4920   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4921   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4922   // If both shuffles use the same mask, and both shuffle within a single
4923   // vector, then it is worthwhile to move the swizzle after the operation.
4924   // The type-legalizer generates this pattern when loading illegal
4925   // vector types from memory. In many cases this allows additional shuffle
4926   // optimizations.
4927   // There are other cases where moving the shuffle after the xor/and/or
4928   // is profitable even if shuffles don't perform a swizzle.
4929   // If both shuffles use the same mask, and both shuffles have the same first
4930   // or second operand, then it might still be profitable to move the shuffle
4931   // after the xor/and/or operation.
4932   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4933     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4934     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4935     assert(X.getValueType() == Y.getValueType() &&
4936            "Inputs to shuffles are not the same type");
4937
4938     // Check that both shuffles use the same mask. The masks are known to be of
4939     // the same length because the result vector type is the same.
4940     // Check also that shuffles have only one use to avoid introducing extra
4941     // instructions.
4942     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4943         !SVN0->getMask().equals(SVN1->getMask()))
4944       return SDValue();
4945
4946     // Don't try to fold this node if it requires introducing a
4947     // build vector of all zeros that might be illegal at this stage.
4948     SDValue ShOp = N0.getOperand(1);
4949     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4950       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4951
4952     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4953     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4954       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4955                                   N0.getOperand(0), N1.getOperand(0));
4956       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4957     }
4958
4959     // Don't try to fold this node if it requires introducing a
4960     // build vector of all zeros that might be illegal at this stage.
4961     ShOp = N0.getOperand(0);
4962     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4963       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4964
4965     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4966     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4967       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4968                                   N1.getOperand(1));
4969       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4970     }
4971   }
4972
4973   return SDValue();
4974 }
4975
4976 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4977 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4978                                        const SDLoc &DL) {
4979   SDValue LL, LR, RL, RR, N0CC, N1CC;
4980   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4981       !isSetCCEquivalent(N1, RL, RR, N1CC))
4982     return SDValue();
4983
4984   assert(N0.getValueType() == N1.getValueType() &&
4985          "Unexpected operand types for bitwise logic op");
4986   assert(LL.getValueType() == LR.getValueType() &&
4987          RL.getValueType() == RR.getValueType() &&
4988          "Unexpected operand types for setcc");
4989
4990   // If we're here post-legalization or the logic op type is not i1, the logic
4991   // op type must match a setcc result type. Also, all folds require new
4992   // operations on the left and right operands, so those types must match.
4993   EVT VT = N0.getValueType();
4994   EVT OpVT = LL.getValueType();
4995   if (LegalOperations || VT.getScalarType() != MVT::i1)
4996     if (VT != getSetCCResultType(OpVT))
4997       return SDValue();
4998   if (OpVT != RL.getValueType())
4999     return SDValue();
5000
5001   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5002   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5003   bool IsInteger = OpVT.isInteger();
5004   if (LR == RR && CC0 == CC1 && IsInteger) {
5005     bool IsZero = isNullOrNullSplat(LR);
5006     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5007
5008     // All bits clear?
5009     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5010     // All sign bits clear?
5011     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5012     // Any bits set?
5013     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5014     // Any sign bits set?
5015     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5016
5017     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
5018     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5019     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
5020     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
5021     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5022       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5023       AddToWorklist(Or.getNode());
5024       return DAG.getSetCC(DL, VT, Or, LR, CC1);
5025     }
5026
5027     // All bits set?
5028     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5029     // All sign bits set?
5030     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5031     // Any bits clear?
5032     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5033     // Any sign bits clear?
5034     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5035
5036     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5037     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
5038     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5039     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
5040     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5041       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5042       AddToWorklist(And.getNode());
5043       return DAG.getSetCC(DL, VT, And, LR, CC1);
5044     }
5045   }
5046
5047   // TODO: What is the 'or' equivalent of this fold?
5048   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5049   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5050       IsInteger && CC0 == ISD::SETNE &&
5051       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5052        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5053     SDValue One = DAG.getConstant(1, DL, OpVT);
5054     SDValue Two = DAG.getConstant(2, DL, OpVT);
5055     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5056     AddToWorklist(Add.getNode());
5057     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5058   }
5059
5060   // Try more general transforms if the predicates match and the only user of
5061   // the compares is the 'and' or 'or'.
5062   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5063       N0.hasOneUse() && N1.hasOneUse()) {
5064     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5065     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5066     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5067       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5068       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5069       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5070       SDValue Zero = DAG.getConstant(0, DL, OpVT);
5071       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5072     }
5073
5074     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5075     // TODO - support non-uniform vector amounts.
5076     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5077       // Match a shared variable operand and 2 non-opaque constant operands.
5078       ConstantSDNode *C0 = isConstOrConstSplat(LR);
5079       ConstantSDNode *C1 = isConstOrConstSplat(RR);
5080       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5081         const APInt &CMax =
5082             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5083         const APInt &CMin =
5084             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5085         // The difference of the constants must be a single bit.
5086         if ((CMax - CMin).isPowerOf2()) {
5087           // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5088           // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5089           SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5090           SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5091           SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5092           SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5093           SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5094           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5095           SDValue Zero = DAG.getConstant(0, DL, OpVT);
5096           return DAG.getSetCC(DL, VT, And, Zero, CC0);
5097         }
5098       }
5099     }
5100   }
5101
5102   // Canonicalize equivalent operands to LL == RL.
5103   if (LL == RR && LR == RL) {
5104     CC1 = ISD::getSetCCSwappedOperands(CC1);
5105     std::swap(RL, RR);
5106   }
5107
5108   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5109   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5110   if (LL == RL && LR == RR) {
5111     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5112                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5113     if (NewCC != ISD::SETCC_INVALID &&
5114         (!LegalOperations ||
5115          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5116           TLI.isOperationLegal(ISD::SETCC, OpVT))))
5117       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5118   }
5119
5120   return SDValue();
5121 }
5122
5123 /// This contains all DAGCombine rules which reduce two values combined by
5124 /// an And operation to a single value. This makes them reusable in the context
5125 /// of visitSELECT(). Rules involving constants are not included as
5126 /// visitSELECT() already handles those cases.
5127 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5128   EVT VT = N1.getValueType();
5129   SDLoc DL(N);
5130
5131   // fold (and x, undef) -> 0
5132   if (N0.isUndef() || N1.isUndef())
5133     return DAG.getConstant(0, DL, VT);
5134
5135   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5136     return V;
5137
5138   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5139       VT.getSizeInBits() <= 64) {
5140     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5141       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5142         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5143         // immediate for an add, but it is legal if its top c2 bits are set,
5144         // transform the ADD so the immediate doesn't need to be materialized
5145         // in a register.
5146         APInt ADDC = ADDI->getAPIntValue();
5147         APInt SRLC = SRLI->getAPIntValue();
5148         if (ADDC.getMinSignedBits() <= 64 &&
5149             SRLC.ult(VT.getSizeInBits()) &&
5150             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5151           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5152                                              SRLC.getZExtValue());
5153           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5154             ADDC |= Mask;
5155             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5156               SDLoc DL0(N0);
5157               SDValue NewAdd =
5158                 DAG.getNode(ISD::ADD, DL0, VT,
5159                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5160               CombineTo(N0.getNode(), NewAdd);
5161               // Return N so it doesn't get rechecked!
5162               return SDValue(N, 0);
5163             }
5164           }
5165         }
5166       }
5167     }
5168   }
5169
5170   // Reduce bit extract of low half of an integer to the narrower type.
5171   // (and (srl i64:x, K), KMask) ->
5172   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5173   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5174     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5175       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5176         unsigned Size = VT.getSizeInBits();
5177         const APInt &AndMask = CAnd->getAPIntValue();
5178         unsigned ShiftBits = CShift->getZExtValue();
5179
5180         // Bail out, this node will probably disappear anyway.
5181         if (ShiftBits == 0)
5182           return SDValue();
5183
5184         unsigned MaskBits = AndMask.countTrailingOnes();
5185         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5186
5187         if (AndMask.isMask() &&
5188             // Required bits must not span the two halves of the integer and
5189             // must fit in the half size type.
5190             (ShiftBits + MaskBits <= Size / 2) &&
5191             TLI.isNarrowingProfitable(VT, HalfVT) &&
5192             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5193             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5194             TLI.isTruncateFree(VT, HalfVT) &&
5195             TLI.isZExtFree(HalfVT, VT)) {
5196           // The isNarrowingProfitable is to avoid regressions on PPC and
5197           // AArch64 which match a few 64-bit bit insert / bit extract patterns
5198           // on downstream users of this. Those patterns could probably be
5199           // extended to handle extensions mixed in.
5200
5201           SDValue SL(N0);
5202           assert(MaskBits <= Size);
5203
5204           // Extracting the highest bit of the low half.
5205           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5206           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5207                                       N0.getOperand(0));
5208
5209           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5210           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5211           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5212           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5213           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5214         }
5215       }
5216     }
5217   }
5218
5219   return SDValue();
5220 }
5221
5222 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5223                                    EVT LoadResultTy, EVT &ExtVT) {
5224   if (!AndC->getAPIntValue().isMask())
5225     return false;
5226
5227   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5228
5229   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5230   EVT LoadedVT = LoadN->getMemoryVT();
5231
5232   if (ExtVT == LoadedVT &&
5233       (!LegalOperations ||
5234        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5235     // ZEXTLOAD will match without needing to change the size of the value being
5236     // loaded.
5237     return true;
5238   }
5239
5240   // Do not change the width of a volatile or atomic loads.
5241   if (!LoadN->isSimple())
5242     return false;
5243
5244   // Do not generate loads of non-round integer types since these can
5245   // be expensive (and would be wrong if the type is not byte sized).
5246   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5247     return false;
5248
5249   if (LegalOperations &&
5250       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5251     return false;
5252
5253   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5254     return false;
5255
5256   return true;
5257 }
5258
5259 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5260                                     ISD::LoadExtType ExtType, EVT &MemVT,
5261                                     unsigned ShAmt) {
5262   if (!LDST)
5263     return false;
5264   // Only allow byte offsets.
5265   if (ShAmt % 8)
5266     return false;
5267
5268   // Do not generate loads of non-round integer types since these can
5269   // be expensive (and would be wrong if the type is not byte sized).
5270   if (!MemVT.isRound())
5271     return false;
5272
5273   // Don't change the width of a volatile or atomic loads.
5274   if (!LDST->isSimple())
5275     return false;
5276
5277   EVT LdStMemVT = LDST->getMemoryVT();
5278
5279   // Bail out when changing the scalable property, since we can't be sure that
5280   // we're actually narrowing here.
5281   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5282     return false;
5283
5284   // Verify that we are actually reducing a load width here.
5285   if (LdStMemVT.bitsLT(MemVT))
5286     return false;
5287
5288   // Ensure that this isn't going to produce an unsupported memory access.
5289   if (ShAmt) {
5290     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5291     const unsigned ByteShAmt = ShAmt / 8;
5292     const Align LDSTAlign = LDST->getAlign();
5293     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5294     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5295                                 LDST->getAddressSpace(), NarrowAlign,
5296                                 LDST->getMemOperand()->getFlags()))
5297       return false;
5298   }
5299
5300   // It's not possible to generate a constant of extended or untyped type.
5301   EVT PtrType = LDST->getBasePtr().getValueType();
5302   if (PtrType == MVT::Untyped || PtrType.isExtended())
5303     return false;
5304
5305   if (isa<LoadSDNode>(LDST)) {
5306     LoadSDNode *Load = cast<LoadSDNode>(LDST);
5307     // Don't transform one with multiple uses, this would require adding a new
5308     // load.
5309     if (!SDValue(Load, 0).hasOneUse())
5310       return false;
5311
5312     if (LegalOperations &&
5313         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5314       return false;
5315
5316     // For the transform to be legal, the load must produce only two values
5317     // (the value loaded and the chain).  Don't transform a pre-increment
5318     // load, for example, which produces an extra value.  Otherwise the
5319     // transformation is not equivalent, and the downstream logic to replace
5320     // uses gets things wrong.
5321     if (Load->getNumValues() > 2)
5322       return false;
5323
5324     // If the load that we're shrinking is an extload and we're not just
5325     // discarding the extension we can't simply shrink the load. Bail.
5326     // TODO: It would be possible to merge the extensions in some cases.
5327     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5328         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5329       return false;
5330
5331     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5332       return false;
5333   } else {
5334     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5335     StoreSDNode *Store = cast<StoreSDNode>(LDST);
5336     // Can't write outside the original store
5337     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5338       return false;
5339
5340     if (LegalOperations &&
5341         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5342       return false;
5343   }
5344   return true;
5345 }
5346
5347 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5348                                     SmallVectorImpl<LoadSDNode*> &Loads,
5349                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5350                                     ConstantSDNode *Mask,
5351                                     SDNode *&NodeToMask) {
5352   // Recursively search for the operands, looking for loads which can be
5353   // narrowed.
5354   for (SDValue Op : N->op_values()) {
5355     if (Op.getValueType().isVector())
5356       return false;
5357
5358     // Some constants may need fixing up later if they are too large.
5359     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5360       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5361           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5362         NodesWithConsts.insert(N);
5363       continue;
5364     }
5365
5366     if (!Op.hasOneUse())
5367       return false;
5368
5369     switch(Op.getOpcode()) {
5370     case ISD::LOAD: {
5371       auto *Load = cast<LoadSDNode>(Op);
5372       EVT ExtVT;
5373       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5374           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5375
5376         // ZEXTLOAD is already small enough.
5377         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5378             ExtVT.bitsGE(Load->getMemoryVT()))
5379           continue;
5380
5381         // Use LE to convert equal sized loads to zext.
5382         if (ExtVT.bitsLE(Load->getMemoryVT()))
5383           Loads.push_back(Load);
5384
5385         continue;
5386       }
5387       return false;
5388     }
5389     case ISD::ZERO_EXTEND:
5390     case ISD::AssertZext: {
5391       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5392       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5393       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5394         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5395         Op.getOperand(0).getValueType();
5396
5397       // We can accept extending nodes if the mask is wider or an equal
5398       // width to the original type.
5399       if (ExtVT.bitsGE(VT))
5400         continue;
5401       break;
5402     }
5403     case ISD::OR:
5404     case ISD::XOR:
5405     case ISD::AND:
5406       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5407                              NodeToMask))
5408         return false;
5409       continue;
5410     }
5411
5412     // Allow one node which will masked along with any loads found.
5413     if (NodeToMask)
5414       return false;
5415
5416     // Also ensure that the node to be masked only produces one data result.
5417     NodeToMask = Op.getNode();
5418     if (NodeToMask->getNumValues() > 1) {
5419       bool HasValue = false;
5420       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5421         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5422         if (VT != MVT::Glue && VT != MVT::Other) {
5423           if (HasValue) {
5424             NodeToMask = nullptr;
5425             return false;
5426           }
5427           HasValue = true;
5428         }
5429       }
5430       assert(HasValue && "Node to be masked has no data result?");
5431     }
5432   }
5433   return true;
5434 }
5435
5436 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5437   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5438   if (!Mask)
5439     return false;
5440
5441   if (!Mask->getAPIntValue().isMask())
5442     return false;
5443
5444   // No need to do anything if the and directly uses a load.
5445   if (isa<LoadSDNode>(N->getOperand(0)))
5446     return false;
5447
5448   SmallVector<LoadSDNode*, 8> Loads;
5449   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5450   SDNode *FixupNode = nullptr;
5451   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5452     if (Loads.size() == 0)
5453       return false;
5454
5455     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5456     SDValue MaskOp = N->getOperand(1);
5457
5458     // If it exists, fixup the single node we allow in the tree that needs
5459     // masking.
5460     if (FixupNode) {
5461       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5462       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5463                                 FixupNode->getValueType(0),
5464                                 SDValue(FixupNode, 0), MaskOp);
5465       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5466       if (And.getOpcode() == ISD ::AND)
5467         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5468     }
5469
5470     // Narrow any constants that need it.
5471     for (auto *LogicN : NodesWithConsts) {
5472       SDValue Op0 = LogicN->getOperand(0);
5473       SDValue Op1 = LogicN->getOperand(1);
5474
5475       if (isa<ConstantSDNode>(Op0))
5476           std::swap(Op0, Op1);
5477
5478       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5479                                 Op1, MaskOp);
5480
5481       DAG.UpdateNodeOperands(LogicN, Op0, And);
5482     }
5483
5484     // Create narrow loads.
5485     for (auto *Load : Loads) {
5486       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5487       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5488                                 SDValue(Load, 0), MaskOp);
5489       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5490       if (And.getOpcode() == ISD ::AND)
5491         And = SDValue(
5492             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5493       SDValue NewLoad = ReduceLoadWidth(And.getNode());
5494       assert(NewLoad &&
5495              "Shouldn't be masking the load if it can't be narrowed");
5496       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5497     }
5498     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5499     return true;
5500   }
5501   return false;
5502 }
5503
5504 // Unfold
5505 //    x &  (-1 'logical shift' y)
5506 // To
5507 //    (x 'opposite logical shift' y) 'logical shift' y
5508 // if it is better for performance.
5509 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5510   assert(N->getOpcode() == ISD::AND);
5511
5512   SDValue N0 = N->getOperand(0);
5513   SDValue N1 = N->getOperand(1);
5514
5515   // Do we actually prefer shifts over mask?
5516   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5517     return SDValue();
5518
5519   // Try to match  (-1 '[outer] logical shift' y)
5520   unsigned OuterShift;
5521   unsigned InnerShift; // The opposite direction to the OuterShift.
5522   SDValue Y;           // Shift amount.
5523   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5524     if (!M.hasOneUse())
5525       return false;
5526     OuterShift = M->getOpcode();
5527     if (OuterShift == ISD::SHL)
5528       InnerShift = ISD::SRL;
5529     else if (OuterShift == ISD::SRL)
5530       InnerShift = ISD::SHL;
5531     else
5532       return false;
5533     if (!isAllOnesConstant(M->getOperand(0)))
5534       return false;
5535     Y = M->getOperand(1);
5536     return true;
5537   };
5538
5539   SDValue X;
5540   if (matchMask(N1))
5541     X = N0;
5542   else if (matchMask(N0))
5543     X = N1;
5544   else
5545     return SDValue();
5546
5547   SDLoc DL(N);
5548   EVT VT = N->getValueType(0);
5549
5550   //     tmp = x   'opposite logical shift' y
5551   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5552   //     ret = tmp 'logical shift' y
5553   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5554
5555   return T1;
5556 }
5557
5558 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5559 /// For a target with a bit test, this is expected to become test + set and save
5560 /// at least 1 instruction.
5561 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5562   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5563
5564   // This is probably not worthwhile without a supported type.
5565   EVT VT = And->getValueType(0);
5566   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5567   if (!TLI.isTypeLegal(VT))
5568     return SDValue();
5569
5570   // Look through an optional extension and find a 'not'.
5571   // TODO: Should we favor test+set even without the 'not' op?
5572   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5573   if (Not.getOpcode() == ISD::ANY_EXTEND)
5574     Not = Not.getOperand(0);
5575   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5576     return SDValue();
5577
5578   // Look though an optional truncation. The source operand may not be the same
5579   // type as the original 'and', but that is ok because we are masking off
5580   // everything but the low bit.
5581   SDValue Srl = Not.getOperand(0);
5582   if (Srl.getOpcode() == ISD::TRUNCATE)
5583     Srl = Srl.getOperand(0);
5584
5585   // Match a shift-right by constant.
5586   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5587       !isa<ConstantSDNode>(Srl.getOperand(1)))
5588     return SDValue();
5589
5590   // We might have looked through casts that make this transform invalid.
5591   // TODO: If the source type is wider than the result type, do the mask and
5592   //       compare in the source type.
5593   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5594   unsigned VTBitWidth = VT.getSizeInBits();
5595   if (ShiftAmt.uge(VTBitWidth))
5596     return SDValue();
5597
5598   // Turn this into a bit-test pattern using mask op + setcc:
5599   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5600   SDLoc DL(And);
5601   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5602   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5603   SDValue Mask = DAG.getConstant(
5604       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5605   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5606   SDValue Zero = DAG.getConstant(0, DL, VT);
5607   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5608   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5609 }
5610
5611 SDValue DAGCombiner::visitAND(SDNode *N) {
5612   SDValue N0 = N->getOperand(0);
5613   SDValue N1 = N->getOperand(1);
5614   EVT VT = N1.getValueType();
5615
5616   // x & x --> x
5617   if (N0 == N1)
5618     return N0;
5619
5620   // fold vector ops
5621   if (VT.isVector()) {
5622     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5623       return FoldedVOp;
5624
5625     // fold (and x, 0) -> 0, vector edition
5626     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
5627       // do not return N0, because undef node may exist in N0
5628       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5629                              SDLoc(N), N0.getValueType());
5630     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5631       // do not return N1, because undef node may exist in N1
5632       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5633                              SDLoc(N), N1.getValueType());
5634
5635     // fold (and x, -1) -> x, vector edition
5636     if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
5637       return N1;
5638     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
5639       return N0;
5640
5641     // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5642     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5643     auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5644     if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5645         N0.hasOneUse() && N1.hasOneUse()) {
5646       EVT LoadVT = MLoad->getMemoryVT();
5647       EVT ExtVT = VT;
5648       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5649         // For this AND to be a zero extension of the masked load the elements
5650         // of the BuildVec must mask the bottom bits of the extended element
5651         // type
5652         if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5653           uint64_t ElementSize =
5654               LoadVT.getVectorElementType().getScalarSizeInBits();
5655           if (Splat->getAPIntValue().isMask(ElementSize)) {
5656             return DAG.getMaskedLoad(
5657                 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5658                 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5659                 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5660                 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5661           }
5662         }
5663       }
5664     }
5665   }
5666
5667   // fold (and c1, c2) -> c1&c2
5668   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5669   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5670     return C;
5671
5672   // canonicalize constant to RHS
5673   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5674       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5675     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5676
5677   // fold (and x, -1) -> x
5678   if (isAllOnesConstant(N1))
5679     return N0;
5680
5681   // if (and x, c) is known to be zero, return 0
5682   unsigned BitWidth = VT.getScalarSizeInBits();
5683   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5684                                    APInt::getAllOnesValue(BitWidth)))
5685     return DAG.getConstant(0, SDLoc(N), VT);
5686
5687   if (SDValue NewSel = foldBinOpIntoSelect(N))
5688     return NewSel;
5689
5690   // reassociate and
5691   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5692     return RAND;
5693
5694   // Try to convert a constant mask AND into a shuffle clear mask.
5695   if (VT.isVector())
5696     if (SDValue Shuffle = XformToShuffleWithZero(N))
5697       return Shuffle;
5698
5699   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5700     return Combined;
5701
5702   // fold (and (or x, C), D) -> D if (C & D) == D
5703   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5704     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5705   };
5706   if (N0.getOpcode() == ISD::OR &&
5707       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5708     return N1;
5709   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5710   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5711     SDValue N0Op0 = N0.getOperand(0);
5712     APInt Mask = ~N1C->getAPIntValue();
5713     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5714     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5715       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5716                                  N0.getValueType(), N0Op0);
5717
5718       // Replace uses of the AND with uses of the Zero extend node.
5719       CombineTo(N, Zext);
5720
5721       // We actually want to replace all uses of the any_extend with the
5722       // zero_extend, to avoid duplicating things.  This will later cause this
5723       // AND to be folded.
5724       CombineTo(N0.getNode(), Zext);
5725       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5726     }
5727   }
5728
5729   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5730   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5731   // already be zero by virtue of the width of the base type of the load.
5732   //
5733   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5734   // more cases.
5735   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5736        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5737        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5738        N0.getOperand(0).getResNo() == 0) ||
5739       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5740     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5741                                          N0 : N0.getOperand(0) );
5742
5743     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5744     // This can be a pure constant or a vector splat, in which case we treat the
5745     // vector as a scalar and use the splat value.
5746     APInt Constant = APInt::getNullValue(1);
5747     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5748       Constant = C->getAPIntValue();
5749     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5750       APInt SplatValue, SplatUndef;
5751       unsigned SplatBitSize;
5752       bool HasAnyUndefs;
5753       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5754                                              SplatBitSize, HasAnyUndefs);
5755       if (IsSplat) {
5756         // Undef bits can contribute to a possible optimisation if set, so
5757         // set them.
5758         SplatValue |= SplatUndef;
5759
5760         // The splat value may be something like "0x00FFFFFF", which means 0 for
5761         // the first vector value and FF for the rest, repeating. We need a mask
5762         // that will apply equally to all members of the vector, so AND all the
5763         // lanes of the constant together.
5764         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5765
5766         // If the splat value has been compressed to a bitlength lower
5767         // than the size of the vector lane, we need to re-expand it to
5768         // the lane size.
5769         if (EltBitWidth > SplatBitSize)
5770           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5771                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5772             SplatValue |= SplatValue.shl(SplatBitSize);
5773
5774         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5775         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5776         if ((SplatBitSize % EltBitWidth) == 0) {
5777           Constant = APInt::getAllOnesValue(EltBitWidth);
5778           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5779             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5780         }
5781       }
5782     }
5783
5784     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5785     // actually legal and isn't going to get expanded, else this is a false
5786     // optimisation.
5787     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5788                                                     Load->getValueType(0),
5789                                                     Load->getMemoryVT());
5790
5791     // Resize the constant to the same size as the original memory access before
5792     // extension. If it is still the AllOnesValue then this AND is completely
5793     // unneeded.
5794     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5795
5796     bool B;
5797     switch (Load->getExtensionType()) {
5798     default: B = false; break;
5799     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5800     case ISD::ZEXTLOAD:
5801     case ISD::NON_EXTLOAD: B = true; break;
5802     }
5803
5804     if (B && Constant.isAllOnesValue()) {
5805       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5806       // preserve semantics once we get rid of the AND.
5807       SDValue NewLoad(Load, 0);
5808
5809       // Fold the AND away. NewLoad may get replaced immediately.
5810       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5811
5812       if (Load->getExtensionType() == ISD::EXTLOAD) {
5813         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5814                               Load->getValueType(0), SDLoc(Load),
5815                               Load->getChain(), Load->getBasePtr(),
5816                               Load->getOffset(), Load->getMemoryVT(),
5817                               Load->getMemOperand());
5818         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5819         if (Load->getNumValues() == 3) {
5820           // PRE/POST_INC loads have 3 values.
5821           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5822                            NewLoad.getValue(2) };
5823           CombineTo(Load, To, 3, true);
5824         } else {
5825           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5826         }
5827       }
5828
5829       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5830     }
5831   }
5832
5833   // fold (and (masked_gather x)) -> (zext_masked_gather x)
5834   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
5835     EVT MemVT = GN0->getMemoryVT();
5836     EVT ScalarVT = MemVT.getScalarType();
5837
5838     if (SDValue(GN0, 0).hasOneUse() &&
5839         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
5840         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
5841       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
5842                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
5843
5844       SDValue ZExtLoad = DAG.getMaskedGather(
5845           DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
5846           GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
5847
5848       CombineTo(N, ZExtLoad);
5849       AddToWorklist(ZExtLoad.getNode());
5850       // Avoid recheck of N.
5851       return SDValue(N, 0);
5852     }
5853   }
5854
5855   // fold (and (load x), 255) -> (zextload x, i8)
5856   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5857   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5858   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5859                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5860                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5861     if (SDValue Res = ReduceLoadWidth(N)) {
5862       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5863         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5864       AddToWorklist(N);
5865       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5866       return SDValue(N, 0);
5867     }
5868   }
5869
5870   if (LegalTypes) {
5871     // Attempt to propagate the AND back up to the leaves which, if they're
5872     // loads, can be combined to narrow loads and the AND node can be removed.
5873     // Perform after legalization so that extend nodes will already be
5874     // combined into the loads.
5875     if (BackwardsPropagateMask(N))
5876       return SDValue(N, 0);
5877   }
5878
5879   if (SDValue Combined = visitANDLike(N0, N1, N))
5880     return Combined;
5881
5882   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5883   if (N0.getOpcode() == N1.getOpcode())
5884     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5885       return V;
5886
5887   // Masking the negated extension of a boolean is just the zero-extended
5888   // boolean:
5889   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5890   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5891   //
5892   // Note: the SimplifyDemandedBits fold below can make an information-losing
5893   // transform, and then we have no way to find this better fold.
5894   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5895     if (isNullOrNullSplat(N0.getOperand(0))) {
5896       SDValue SubRHS = N0.getOperand(1);
5897       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5898           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5899         return SubRHS;
5900       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5901           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5902         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5903     }
5904   }
5905
5906   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5907   // fold (and (sra)) -> (and (srl)) when possible.
5908   if (SimplifyDemandedBits(SDValue(N, 0)))
5909     return SDValue(N, 0);
5910
5911   // fold (zext_inreg (extload x)) -> (zextload x)
5912   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5913   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5914       (ISD::isEXTLoad(N0.getNode()) ||
5915        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5916     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5917     EVT MemVT = LN0->getMemoryVT();
5918     // If we zero all the possible extended bits, then we can turn this into
5919     // a zextload if we are running before legalize or the operation is legal.
5920     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5921     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5922     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5923     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5924         ((!LegalOperations && LN0->isSimple()) ||
5925          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5926       SDValue ExtLoad =
5927           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5928                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5929       AddToWorklist(N);
5930       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5931       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5932     }
5933   }
5934
5935   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5936   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5937     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5938                                            N0.getOperand(1), false))
5939       return BSwap;
5940   }
5941
5942   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5943     return Shifts;
5944
5945   if (TLI.hasBitTest(N0, N1))
5946     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5947       return V;
5948
5949   // Recognize the following pattern:
5950   //
5951   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
5952   //
5953   // where bitmask is a mask that clears the upper bits of AndVT. The
5954   // number of bits in bitmask must be a power of two.
5955   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
5956     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
5957       return false;
5958
5959     auto *C = dyn_cast<ConstantSDNode>(RHS);
5960     if (!C)
5961       return false;
5962
5963     if (!C->getAPIntValue().isMask(
5964             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
5965       return false;
5966
5967     return true;
5968   };
5969
5970   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
5971   if (IsAndZeroExtMask(N0, N1))
5972     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
5973
5974   return SDValue();
5975 }
5976
5977 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5978 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5979                                         bool DemandHighBits) {
5980   if (!LegalOperations)
5981     return SDValue();
5982
5983   EVT VT = N->getValueType(0);
5984   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5985     return SDValue();
5986   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5987     return SDValue();
5988
5989   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5990   bool LookPassAnd0 = false;
5991   bool LookPassAnd1 = false;
5992   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5993       std::swap(N0, N1);
5994   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5995       std::swap(N0, N1);
5996   if (N0.getOpcode() == ISD::AND) {
5997     if (!N0.getNode()->hasOneUse())
5998       return SDValue();
5999     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6000     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6001     // This is needed for X86.
6002     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6003                   N01C->getZExtValue() != 0xFFFF))
6004       return SDValue();
6005     N0 = N0.getOperand(0);
6006     LookPassAnd0 = true;
6007   }
6008
6009   if (N1.getOpcode() == ISD::AND) {
6010     if (!N1.getNode()->hasOneUse())
6011       return SDValue();
6012     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6013     if (!N11C || N11C->getZExtValue() != 0xFF)
6014       return SDValue();
6015     N1 = N1.getOperand(0);
6016     LookPassAnd1 = true;
6017   }
6018
6019   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6020     std::swap(N0, N1);
6021   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6022     return SDValue();
6023   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
6024     return SDValue();
6025
6026   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6027   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6028   if (!N01C || !N11C)
6029     return SDValue();
6030   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6031     return SDValue();
6032
6033   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6034   SDValue N00 = N0->getOperand(0);
6035   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6036     if (!N00.getNode()->hasOneUse())
6037       return SDValue();
6038     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6039     if (!N001C || N001C->getZExtValue() != 0xFF)
6040       return SDValue();
6041     N00 = N00.getOperand(0);
6042     LookPassAnd0 = true;
6043   }
6044
6045   SDValue N10 = N1->getOperand(0);
6046   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6047     if (!N10.getNode()->hasOneUse())
6048       return SDValue();
6049     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6050     // Also allow 0xFFFF since the bits will be shifted out. This is needed
6051     // for X86.
6052     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6053                    N101C->getZExtValue() != 0xFFFF))
6054       return SDValue();
6055     N10 = N10.getOperand(0);
6056     LookPassAnd1 = true;
6057   }
6058
6059   if (N00 != N10)
6060     return SDValue();
6061
6062   // Make sure everything beyond the low halfword gets set to zero since the SRL
6063   // 16 will clear the top bits.
6064   unsigned OpSizeInBits = VT.getSizeInBits();
6065   if (DemandHighBits && OpSizeInBits > 16) {
6066     // If the left-shift isn't masked out then the only way this is a bswap is
6067     // if all bits beyond the low 8 are 0. In that case the entire pattern
6068     // reduces to a left shift anyway: leave it for other parts of the combiner.
6069     if (!LookPassAnd0)
6070       return SDValue();
6071
6072     // However, if the right shift isn't masked out then it might be because
6073     // it's not needed. See if we can spot that too.
6074     if (!LookPassAnd1 &&
6075         !DAG.MaskedValueIsZero(
6076             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6077       return SDValue();
6078   }
6079
6080   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6081   if (OpSizeInBits > 16) {
6082     SDLoc DL(N);
6083     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6084                       DAG.getConstant(OpSizeInBits - 16, DL,
6085                                       getShiftAmountTy(VT)));
6086   }
6087   return Res;
6088 }
6089
6090 /// Return true if the specified node is an element that makes up a 32-bit
6091 /// packed halfword byteswap.
6092 /// ((x & 0x000000ff) << 8) |
6093 /// ((x & 0x0000ff00) >> 8) |
6094 /// ((x & 0x00ff0000) << 8) |
6095 /// ((x & 0xff000000) >> 8)
6096 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6097   if (!N.getNode()->hasOneUse())
6098     return false;
6099
6100   unsigned Opc = N.getOpcode();
6101   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6102     return false;
6103
6104   SDValue N0 = N.getOperand(0);
6105   unsigned Opc0 = N0.getOpcode();
6106   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6107     return false;
6108
6109   ConstantSDNode *N1C = nullptr;
6110   // SHL or SRL: look upstream for AND mask operand
6111   if (Opc == ISD::AND)
6112     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6113   else if (Opc0 == ISD::AND)
6114     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6115   if (!N1C)
6116     return false;
6117
6118   unsigned MaskByteOffset;
6119   switch (N1C->getZExtValue()) {
6120   default:
6121     return false;
6122   case 0xFF:       MaskByteOffset = 0; break;
6123   case 0xFF00:     MaskByteOffset = 1; break;
6124   case 0xFFFF:
6125     // In case demanded bits didn't clear the bits that will be shifted out.
6126     // This is needed for X86.
6127     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6128       MaskByteOffset = 1;
6129       break;
6130     }
6131     return false;
6132   case 0xFF0000:   MaskByteOffset = 2; break;
6133   case 0xFF000000: MaskByteOffset = 3; break;
6134   }
6135
6136   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6137   if (Opc == ISD::AND) {
6138     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6139       // (x >> 8) & 0xff
6140       // (x >> 8) & 0xff0000
6141       if (Opc0 != ISD::SRL)
6142         return false;
6143       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6144       if (!C || C->getZExtValue() != 8)
6145         return false;
6146     } else {
6147       // (x << 8) & 0xff00
6148       // (x << 8) & 0xff000000
6149       if (Opc0 != ISD::SHL)
6150         return false;
6151       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6152       if (!C || C->getZExtValue() != 8)
6153         return false;
6154     }
6155   } else if (Opc == ISD::SHL) {
6156     // (x & 0xff) << 8
6157     // (x & 0xff0000) << 8
6158     if (MaskByteOffset != 0 && MaskByteOffset != 2)
6159       return false;
6160     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6161     if (!C || C->getZExtValue() != 8)
6162       return false;
6163   } else { // Opc == ISD::SRL
6164     // (x & 0xff00) >> 8
6165     // (x & 0xff000000) >> 8
6166     if (MaskByteOffset != 1 && MaskByteOffset != 3)
6167       return false;
6168     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6169     if (!C || C->getZExtValue() != 8)
6170       return false;
6171   }
6172
6173   if (Parts[MaskByteOffset])
6174     return false;
6175
6176   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6177   return true;
6178 }
6179
6180 // Match 2 elements of a packed halfword bswap.
6181 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6182   if (N.getOpcode() == ISD::OR)
6183     return isBSwapHWordElement(N.getOperand(0), Parts) &&
6184            isBSwapHWordElement(N.getOperand(1), Parts);
6185
6186   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6187     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6188     if (!C || C->getAPIntValue() != 16)
6189       return false;
6190     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6191     return true;
6192   }
6193
6194   return false;
6195 }
6196
6197 // Match this pattern:
6198 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6199 // And rewrite this to:
6200 //   (rotr (bswap A), 16)
6201 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6202                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
6203                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
6204   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6205          "MatchBSwapHWordOrAndAnd: expecting i32");
6206   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6207     return SDValue();
6208   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6209     return SDValue();
6210   // TODO: this is too restrictive; lifting this restriction requires more tests
6211   if (!N0->hasOneUse() || !N1->hasOneUse())
6212     return SDValue();
6213   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6214   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6215   if (!Mask0 || !Mask1)
6216     return SDValue();
6217   if (Mask0->getAPIntValue() != 0xff00ff00 ||
6218       Mask1->getAPIntValue() != 0x00ff00ff)
6219     return SDValue();
6220   SDValue Shift0 = N0.getOperand(0);
6221   SDValue Shift1 = N1.getOperand(0);
6222   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6223     return SDValue();
6224   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6225   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6226   if (!ShiftAmt0 || !ShiftAmt1)
6227     return SDValue();
6228   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6229     return SDValue();
6230   if (Shift0.getOperand(0) != Shift1.getOperand(0))
6231     return SDValue();
6232
6233   SDLoc DL(N);
6234   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6235   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6236   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6237 }
6238
6239 /// Match a 32-bit packed halfword bswap. That is
6240 /// ((x & 0x000000ff) << 8) |
6241 /// ((x & 0x0000ff00) >> 8) |
6242 /// ((x & 0x00ff0000) << 8) |
6243 /// ((x & 0xff000000) >> 8)
6244 /// => (rotl (bswap x), 16)
6245 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6246   if (!LegalOperations)
6247     return SDValue();
6248
6249   EVT VT = N->getValueType(0);
6250   if (VT != MVT::i32)
6251     return SDValue();
6252   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6253     return SDValue();
6254
6255   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6256                                               getShiftAmountTy(VT)))
6257   return BSwap;
6258
6259   // Try again with commuted operands.
6260   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6261                                               getShiftAmountTy(VT)))
6262   return BSwap;
6263
6264
6265   // Look for either
6266   // (or (bswaphpair), (bswaphpair))
6267   // (or (or (bswaphpair), (and)), (and))
6268   // (or (or (and), (bswaphpair)), (and))
6269   SDNode *Parts[4] = {};
6270
6271   if (isBSwapHWordPair(N0, Parts)) {
6272     // (or (or (and), (and)), (or (and), (and)))
6273     if (!isBSwapHWordPair(N1, Parts))
6274       return SDValue();
6275   } else if (N0.getOpcode() == ISD::OR) {
6276     // (or (or (or (and), (and)), (and)), (and))
6277     if (!isBSwapHWordElement(N1, Parts))
6278       return SDValue();
6279     SDValue N00 = N0.getOperand(0);
6280     SDValue N01 = N0.getOperand(1);
6281     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6282         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6283       return SDValue();
6284   } else
6285     return SDValue();
6286
6287   // Make sure the parts are all coming from the same node.
6288   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6289     return SDValue();
6290
6291   SDLoc DL(N);
6292   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6293                               SDValue(Parts[0], 0));
6294
6295   // Result of the bswap should be rotated by 16. If it's not legal, then
6296   // do  (x << 16) | (x >> 16).
6297   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6298   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6299     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6300   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6301     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6302   return DAG.getNode(ISD::OR, DL, VT,
6303                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6304                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6305 }
6306
6307 /// This contains all DAGCombine rules which reduce two values combined by
6308 /// an Or operation to a single value \see visitANDLike().
6309 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6310   EVT VT = N1.getValueType();
6311   SDLoc DL(N);
6312
6313   // fold (or x, undef) -> -1
6314   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6315     return DAG.getAllOnesConstant(DL, VT);
6316
6317   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6318     return V;
6319
6320   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
6321   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6322       // Don't increase # computations.
6323       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6324     // We can only do this xform if we know that bits from X that are set in C2
6325     // but not in C1 are already zero.  Likewise for Y.
6326     if (const ConstantSDNode *N0O1C =
6327         getAsNonOpaqueConstant(N0.getOperand(1))) {
6328       if (const ConstantSDNode *N1O1C =
6329           getAsNonOpaqueConstant(N1.getOperand(1))) {
6330         // We can only do this xform if we know that bits from X that are set in
6331         // C2 but not in C1 are already zero.  Likewise for Y.
6332         const APInt &LHSMask = N0O1C->getAPIntValue();
6333         const APInt &RHSMask = N1O1C->getAPIntValue();
6334
6335         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6336             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6337           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6338                                   N0.getOperand(0), N1.getOperand(0));
6339           return DAG.getNode(ISD::AND, DL, VT, X,
6340                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
6341         }
6342       }
6343     }
6344   }
6345
6346   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6347   if (N0.getOpcode() == ISD::AND &&
6348       N1.getOpcode() == ISD::AND &&
6349       N0.getOperand(0) == N1.getOperand(0) &&
6350       // Don't increase # computations.
6351       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6352     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6353                             N0.getOperand(1), N1.getOperand(1));
6354     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6355   }
6356
6357   return SDValue();
6358 }
6359
6360 /// OR combines for which the commuted variant will be tried as well.
6361 static SDValue visitORCommutative(
6362     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6363   EVT VT = N0.getValueType();
6364   if (N0.getOpcode() == ISD::AND) {
6365     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6366     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6367       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6368
6369     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6370     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6371       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6372   }
6373
6374   return SDValue();
6375 }
6376
6377 SDValue DAGCombiner::visitOR(SDNode *N) {
6378   SDValue N0 = N->getOperand(0);
6379   SDValue N1 = N->getOperand(1);
6380   EVT VT = N1.getValueType();
6381
6382   // x | x --> x
6383   if (N0 == N1)
6384     return N0;
6385
6386   // fold vector ops
6387   if (VT.isVector()) {
6388     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6389       return FoldedVOp;
6390
6391     // fold (or x, 0) -> x, vector edition
6392     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
6393       return N1;
6394     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6395       return N0;
6396
6397     // fold (or x, -1) -> -1, vector edition
6398     if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
6399       // do not return N0, because undef node may exist in N0
6400       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
6401     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6402       // do not return N1, because undef node may exist in N1
6403       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6404
6405     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6406     // Do this only if the resulting shuffle is legal.
6407     if (isa<ShuffleVectorSDNode>(N0) &&
6408         isa<ShuffleVectorSDNode>(N1) &&
6409         // Avoid folding a node with illegal type.
6410         TLI.isTypeLegal(VT)) {
6411       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6412       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6413       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6414       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6415       // Ensure both shuffles have a zero input.
6416       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6417         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6418         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6419         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6420         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6421         bool CanFold = true;
6422         int NumElts = VT.getVectorNumElements();
6423         SmallVector<int, 4> Mask(NumElts);
6424
6425         for (int i = 0; i != NumElts; ++i) {
6426           int M0 = SV0->getMaskElt(i);
6427           int M1 = SV1->getMaskElt(i);
6428
6429           // Determine if either index is pointing to a zero vector.
6430           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6431           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6432
6433           // If one element is zero and the otherside is undef, keep undef.
6434           // This also handles the case that both are undef.
6435           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6436             Mask[i] = -1;
6437             continue;
6438           }
6439
6440           // Make sure only one of the elements is zero.
6441           if (M0Zero == M1Zero) {
6442             CanFold = false;
6443             break;
6444           }
6445
6446           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6447
6448           // We have a zero and non-zero element. If the non-zero came from
6449           // SV0 make the index a LHS index. If it came from SV1, make it
6450           // a RHS index. We need to mod by NumElts because we don't care
6451           // which operand it came from in the original shuffles.
6452           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6453         }
6454
6455         if (CanFold) {
6456           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6457           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6458
6459           SDValue LegalShuffle =
6460               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6461                                           Mask, DAG);
6462           if (LegalShuffle)
6463             return LegalShuffle;
6464         }
6465       }
6466     }
6467   }
6468
6469   // fold (or c1, c2) -> c1|c2
6470   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6471   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6472     return C;
6473
6474   // canonicalize constant to RHS
6475   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6476      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6477     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6478
6479   // fold (or x, 0) -> x
6480   if (isNullConstant(N1))
6481     return N0;
6482
6483   // fold (or x, -1) -> -1
6484   if (isAllOnesConstant(N1))
6485     return N1;
6486
6487   if (SDValue NewSel = foldBinOpIntoSelect(N))
6488     return NewSel;
6489
6490   // fold (or x, c) -> c iff (x & ~c) == 0
6491   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6492     return N1;
6493
6494   if (SDValue Combined = visitORLike(N0, N1, N))
6495     return Combined;
6496
6497   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6498     return Combined;
6499
6500   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6501   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6502     return BSwap;
6503   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6504     return BSwap;
6505
6506   // reassociate or
6507   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6508     return ROR;
6509
6510   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6511   // iff (c1 & c2) != 0 or c1/c2 are undef.
6512   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6513     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6514   };
6515   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6516       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6517     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6518                                                  {N1, N0.getOperand(1)})) {
6519       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6520       AddToWorklist(IOR.getNode());
6521       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6522     }
6523   }
6524
6525   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6526     return Combined;
6527   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6528     return Combined;
6529
6530   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6531   if (N0.getOpcode() == N1.getOpcode())
6532     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6533       return V;
6534
6535   // See if this is some rotate idiom.
6536   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6537     return Rot;
6538
6539   if (SDValue Load = MatchLoadCombine(N))
6540     return Load;
6541
6542   // Simplify the operands using demanded-bits information.
6543   if (SimplifyDemandedBits(SDValue(N, 0)))
6544     return SDValue(N, 0);
6545
6546   // If OR can be rewritten into ADD, try combines based on ADD.
6547   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6548       DAG.haveNoCommonBitsSet(N0, N1))
6549     if (SDValue Combined = visitADDLike(N))
6550       return Combined;
6551
6552   return SDValue();
6553 }
6554
6555 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6556   if (Op.getOpcode() == ISD::AND &&
6557       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6558     Mask = Op.getOperand(1);
6559     return Op.getOperand(0);
6560   }
6561   return Op;
6562 }
6563
6564 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6565 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6566                             SDValue &Mask) {
6567   Op = stripConstantMask(DAG, Op, Mask);
6568   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6569     Shift = Op;
6570     return true;
6571   }
6572   return false;
6573 }
6574
6575 /// Helper function for visitOR to extract the needed side of a rotate idiom
6576 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
6577 /// InstCombine merged some outside op with one of the shifts from
6578 /// the rotate pattern.
6579 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6580 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6581 /// patterns:
6582 ///
6583 ///   (or (add v v) (shrl v bitwidth-1)):
6584 ///     expands (add v v) -> (shl v 1)
6585 ///
6586 ///   (or (mul v c0) (shrl (mul v c1) c2)):
6587 ///     expands (mul v c0) -> (shl (mul v c1) c3)
6588 ///
6589 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
6590 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
6591 ///
6592 ///   (or (shl v c0) (shrl (shl v c1) c2)):
6593 ///     expands (shl v c0) -> (shl (shl v c1) c3)
6594 ///
6595 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
6596 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
6597 ///
6598 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6599 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6600                                      SDValue ExtractFrom, SDValue &Mask,
6601                                      const SDLoc &DL) {
6602   assert(OppShift && ExtractFrom && "Empty SDValue");
6603   assert(
6604       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6605       "Existing shift must be valid as a rotate half");
6606
6607   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6608
6609   // Value and Type of the shift.
6610   SDValue OppShiftLHS = OppShift.getOperand(0);
6611   EVT ShiftedVT = OppShiftLHS.getValueType();
6612
6613   // Amount of the existing shift.
6614   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6615
6616   // (add v v) -> (shl v 1)
6617   // TODO: Should this be a general DAG canonicalization?
6618   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6619       ExtractFrom.getOpcode() == ISD::ADD &&
6620       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6621       ExtractFrom.getOperand(0) == OppShiftLHS &&
6622       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6623     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6624                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6625
6626   // Preconditions:
6627   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6628   //
6629   // Find opcode of the needed shift to be extracted from (op0 v c0).
6630   unsigned Opcode = ISD::DELETED_NODE;
6631   bool IsMulOrDiv = false;
6632   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6633   // opcode or its arithmetic (mul or udiv) variant.
6634   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6635     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6636     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6637       return false;
6638     Opcode = NeededShift;
6639     return true;
6640   };
6641   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6642   // that the needed shift can be extracted from.
6643   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6644       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6645     return SDValue();
6646
6647   // op0 must be the same opcode on both sides, have the same LHS argument,
6648   // and produce the same value type.
6649   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6650       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6651       ShiftedVT != ExtractFrom.getValueType())
6652     return SDValue();
6653
6654   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6655   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6656   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6657   ConstantSDNode *ExtractFromCst =
6658       isConstOrConstSplat(ExtractFrom.getOperand(1));
6659   // TODO: We should be able to handle non-uniform constant vectors for these values
6660   // Check that we have constant values.
6661   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6662       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6663       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6664     return SDValue();
6665
6666   // Compute the shift amount we need to extract to complete the rotate.
6667   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6668   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6669     return SDValue();
6670   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6671   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6672   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6673   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6674   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6675
6676   // Now try extract the needed shift from the ExtractFrom op and see if the
6677   // result matches up with the existing shift's LHS op.
6678   if (IsMulOrDiv) {
6679     // Op to extract from is a mul or udiv by a constant.
6680     // Check:
6681     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6682     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6683     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6684                                                  NeededShiftAmt.getZExtValue());
6685     APInt ResultAmt;
6686     APInt Rem;
6687     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6688     if (Rem != 0 || ResultAmt != OppLHSAmt)
6689       return SDValue();
6690   } else {
6691     // Op to extract from is a shift by a constant.
6692     // Check:
6693     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6694     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6695                                           ExtractFromAmt.getBitWidth()))
6696       return SDValue();
6697   }
6698
6699   // Return the expanded shift op that should allow a rotate to be formed.
6700   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6701   EVT ResVT = ExtractFrom.getValueType();
6702   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6703   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6704 }
6705
6706 // Return true if we can prove that, whenever Neg and Pos are both in the
6707 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6708 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6709 //
6710 //     (or (shift1 X, Neg), (shift2 X, Pos))
6711 //
6712 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6713 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6714 // to consider shift amounts with defined behavior.
6715 //
6716 // The IsRotate flag should be set when the LHS of both shifts is the same.
6717 // Otherwise if matching a general funnel shift, it should be clear.
6718 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6719                            SelectionDAG &DAG, bool IsRotate) {
6720   // If EltSize is a power of 2 then:
6721   //
6722   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6723   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6724   //
6725   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6726   // for the stronger condition:
6727   //
6728   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6729   //
6730   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6731   // we can just replace Neg with Neg' for the rest of the function.
6732   //
6733   // In other cases we check for the even stronger condition:
6734   //
6735   //     Neg == EltSize - Pos                                    [B]
6736   //
6737   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6738   // behavior if Pos == 0 (and consequently Neg == EltSize).
6739   //
6740   // We could actually use [A] whenever EltSize is a power of 2, but the
6741   // only extra cases that it would match are those uninteresting ones
6742   // where Neg and Pos are never in range at the same time.  E.g. for
6743   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6744   // as well as (sub 32, Pos), but:
6745   //
6746   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6747   //
6748   // always invokes undefined behavior for 32-bit X.
6749   //
6750   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6751   //
6752   // NOTE: We can only do this when matching an AND and not a general
6753   // funnel shift.
6754   unsigned MaskLoBits = 0;
6755   if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6756     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6757       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6758       unsigned Bits = Log2_64(EltSize);
6759       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6760           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6761         Neg = Neg.getOperand(0);
6762         MaskLoBits = Bits;
6763       }
6764     }
6765   }
6766
6767   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6768   if (Neg.getOpcode() != ISD::SUB)
6769     return false;
6770   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6771   if (!NegC)
6772     return false;
6773   SDValue NegOp1 = Neg.getOperand(1);
6774
6775   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6776   // Pos'.  The truncation is redundant for the purpose of the equality.
6777   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6778     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6779       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6780       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6781           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6782            MaskLoBits))
6783         Pos = Pos.getOperand(0);
6784     }
6785   }
6786
6787   // The condition we need is now:
6788   //
6789   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6790   //
6791   // If NegOp1 == Pos then we need:
6792   //
6793   //              EltSize & Mask == NegC & Mask
6794   //
6795   // (because "x & Mask" is a truncation and distributes through subtraction).
6796   //
6797   // We also need to account for a potential truncation of NegOp1 if the amount
6798   // has already been legalized to a shift amount type.
6799   APInt Width;
6800   if ((Pos == NegOp1) ||
6801       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6802     Width = NegC->getAPIntValue();
6803
6804   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6805   // Then the condition we want to prove becomes:
6806   //
6807   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6808   //
6809   // which, again because "x & Mask" is a truncation, becomes:
6810   //
6811   //                NegC & Mask == (EltSize - PosC) & Mask
6812   //             EltSize & Mask == (NegC + PosC) & Mask
6813   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6814     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6815       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6816     else
6817       return false;
6818   } else
6819     return false;
6820
6821   // Now we just need to check that EltSize & Mask == Width & Mask.
6822   if (MaskLoBits)
6823     // EltSize & Mask is 0 since Mask is EltSize - 1.
6824     return Width.getLoBits(MaskLoBits) == 0;
6825   return Width == EltSize;
6826 }
6827
6828 // A subroutine of MatchRotate used once we have found an OR of two opposite
6829 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6830 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6831 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6832 // Neg with outer conversions stripped away.
6833 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6834                                        SDValue Neg, SDValue InnerPos,
6835                                        SDValue InnerNeg, unsigned PosOpcode,
6836                                        unsigned NegOpcode, const SDLoc &DL) {
6837   // fold (or (shl x, (*ext y)),
6838   //          (srl x, (*ext (sub 32, y)))) ->
6839   //   (rotl x, y) or (rotr x, (sub 32, y))
6840   //
6841   // fold (or (shl x, (*ext (sub 32, y))),
6842   //          (srl x, (*ext y))) ->
6843   //   (rotr x, y) or (rotl x, (sub 32, y))
6844   EVT VT = Shifted.getValueType();
6845   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
6846                      /*IsRotate*/ true)) {
6847     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6848     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6849                        HasPos ? Pos : Neg);
6850   }
6851
6852   return SDValue();
6853 }
6854
6855 // A subroutine of MatchRotate used once we have found an OR of two opposite
6856 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
6857 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6858 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6859 // Neg with outer conversions stripped away.
6860 // TODO: Merge with MatchRotatePosNeg.
6861 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6862                                        SDValue Neg, SDValue InnerPos,
6863                                        SDValue InnerNeg, unsigned PosOpcode,
6864                                        unsigned NegOpcode, const SDLoc &DL) {
6865   EVT VT = N0.getValueType();
6866   unsigned EltBits = VT.getScalarSizeInBits();
6867
6868   // fold (or (shl x0, (*ext y)),
6869   //          (srl x1, (*ext (sub 32, y)))) ->
6870   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6871   //
6872   // fold (or (shl x0, (*ext (sub 32, y))),
6873   //          (srl x1, (*ext y))) ->
6874   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6875   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
6876     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6877     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6878                        HasPos ? Pos : Neg);
6879   }
6880
6881   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6882   // so for now just use the PosOpcode case if its legal.
6883   // TODO: When can we use the NegOpcode case?
6884   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6885     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6886       if (Op.getOpcode() != BinOpc)
6887         return false;
6888       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6889       return Cst && (Cst->getAPIntValue() == Imm);
6890     };
6891
6892     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6893     //   -> (fshl x0, x1, y)
6894     if (IsBinOpImm(N1, ISD::SRL, 1) &&
6895         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6896         InnerPos == InnerNeg.getOperand(0) &&
6897         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6898       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6899     }
6900
6901     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6902     //   -> (fshr x0, x1, y)
6903     if (IsBinOpImm(N0, ISD::SHL, 1) &&
6904         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6905         InnerNeg == InnerPos.getOperand(0) &&
6906         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6907       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6908     }
6909
6910     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6911     //   -> (fshr x0, x1, y)
6912     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6913     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6914         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6915         InnerNeg == InnerPos.getOperand(0) &&
6916         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6917       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6918     }
6919   }
6920
6921   return SDValue();
6922 }
6923
6924 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6925 // idioms for rotate, and if the target supports rotation instructions, generate
6926 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6927 // with different shifted sources.
6928 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6929   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6930   EVT VT = LHS.getValueType();
6931   if (!TLI.isTypeLegal(VT))
6932     return SDValue();
6933
6934   // The target must have at least one rotate/funnel flavor.
6935   bool HasROTL = hasOperation(ISD::ROTL, VT);
6936   bool HasROTR = hasOperation(ISD::ROTR, VT);
6937   bool HasFSHL = hasOperation(ISD::FSHL, VT);
6938   bool HasFSHR = hasOperation(ISD::FSHR, VT);
6939   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6940     return SDValue();
6941
6942   // Check for truncated rotate.
6943   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6944       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6945     assert(LHS.getValueType() == RHS.getValueType());
6946     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6947       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6948     }
6949   }
6950
6951   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6952   SDValue LHSShift;   // The shift.
6953   SDValue LHSMask;    // AND value if any.
6954   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6955
6956   SDValue RHSShift;   // The shift.
6957   SDValue RHSMask;    // AND value if any.
6958   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6959
6960   // If neither side matched a rotate half, bail
6961   if (!LHSShift && !RHSShift)
6962     return SDValue();
6963
6964   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6965   // side of the rotate, so try to handle that here. In all cases we need to
6966   // pass the matched shift from the opposite side to compute the opcode and
6967   // needed shift amount to extract.  We still want to do this if both sides
6968   // matched a rotate half because one half may be a potential overshift that
6969   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6970   // single one).
6971
6972   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6973   if (LHSShift)
6974     if (SDValue NewRHSShift =
6975             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6976       RHSShift = NewRHSShift;
6977   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6978   if (RHSShift)
6979     if (SDValue NewLHSShift =
6980             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6981       LHSShift = NewLHSShift;
6982
6983   // If a side is still missing, nothing else we can do.
6984   if (!RHSShift || !LHSShift)
6985     return SDValue();
6986
6987   // At this point we've matched or extracted a shift op on each side.
6988
6989   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6990     return SDValue(); // Shifts must disagree.
6991
6992   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
6993   if (!IsRotate && !(HasFSHL || HasFSHR))
6994     return SDValue(); // Requires funnel shift support.
6995
6996   // Canonicalize shl to left side in a shl/srl pair.
6997   if (RHSShift.getOpcode() == ISD::SHL) {
6998     std::swap(LHS, RHS);
6999     std::swap(LHSShift, RHSShift);
7000     std::swap(LHSMask, RHSMask);
7001   }
7002
7003   unsigned EltSizeInBits = VT.getScalarSizeInBits();
7004   SDValue LHSShiftArg = LHSShift.getOperand(0);
7005   SDValue LHSShiftAmt = LHSShift.getOperand(1);
7006   SDValue RHSShiftArg = RHSShift.getOperand(0);
7007   SDValue RHSShiftAmt = RHSShift.getOperand(1);
7008
7009   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7010   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7011   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7012   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7013   // iff C1+C2 == EltSizeInBits
7014   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7015                                         ConstantSDNode *RHS) {
7016     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7017   };
7018   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7019     SDValue Res;
7020     if (IsRotate && (HasROTL || HasROTR))
7021       Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7022                         HasROTL ? LHSShiftAmt : RHSShiftAmt);
7023     else
7024       Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7025                         RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
7026
7027     // If there is an AND of either shifted operand, apply it to the result.
7028     if (LHSMask.getNode() || RHSMask.getNode()) {
7029       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7030       SDValue Mask = AllOnes;
7031
7032       if (LHSMask.getNode()) {
7033         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7034         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7035                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7036       }
7037       if (RHSMask.getNode()) {
7038         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7039         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7040                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7041       }
7042
7043       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7044     }
7045
7046     return Res;
7047   }
7048
7049   // If there is a mask here, and we have a variable shift, we can't be sure
7050   // that we're masking out the right stuff.
7051   if (LHSMask.getNode() || RHSMask.getNode())
7052     return SDValue();
7053
7054   // If the shift amount is sign/zext/any-extended just peel it off.
7055   SDValue LExtOp0 = LHSShiftAmt;
7056   SDValue RExtOp0 = RHSShiftAmt;
7057   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7058        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7059        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7060        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7061       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7062        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7063        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7064        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7065     LExtOp0 = LHSShiftAmt.getOperand(0);
7066     RExtOp0 = RHSShiftAmt.getOperand(0);
7067   }
7068
7069   if (IsRotate && (HasROTL || HasROTR)) {
7070     SDValue TryL =
7071         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7072                           RExtOp0, ISD::ROTL, ISD::ROTR, DL);
7073     if (TryL)
7074       return TryL;
7075
7076     SDValue TryR =
7077         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7078                           LExtOp0, ISD::ROTR, ISD::ROTL, DL);
7079     if (TryR)
7080       return TryR;
7081   }
7082
7083   SDValue TryL =
7084       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7085                         LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7086   if (TryL)
7087     return TryL;
7088
7089   SDValue TryR =
7090       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7091                         RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7092   if (TryR)
7093     return TryR;
7094
7095   return SDValue();
7096 }
7097
7098 namespace {
7099
7100 /// Represents known origin of an individual byte in load combine pattern. The
7101 /// value of the byte is either constant zero or comes from memory.
7102 struct ByteProvider {
7103   // For constant zero providers Load is set to nullptr. For memory providers
7104   // Load represents the node which loads the byte from memory.
7105   // ByteOffset is the offset of the byte in the value produced by the load.
7106   LoadSDNode *Load = nullptr;
7107   unsigned ByteOffset = 0;
7108
7109   ByteProvider() = default;
7110
7111   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7112     return ByteProvider(Load, ByteOffset);
7113   }
7114
7115   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7116
7117   bool isConstantZero() const { return !Load; }
7118   bool isMemory() const { return Load; }
7119
7120   bool operator==(const ByteProvider &Other) const {
7121     return Other.Load == Load && Other.ByteOffset == ByteOffset;
7122   }
7123
7124 private:
7125   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7126       : Load(Load), ByteOffset(ByteOffset) {}
7127 };
7128
7129 } // end anonymous namespace
7130
7131 /// Recursively traverses the expression calculating the origin of the requested
7132 /// byte of the given value. Returns None if the provider can't be calculated.
7133 ///
7134 /// For all the values except the root of the expression verifies that the value
7135 /// has exactly one use and if it's not true return None. This way if the origin
7136 /// of the byte is returned it's guaranteed that the values which contribute to
7137 /// the byte are not used outside of this expression.
7138 ///
7139 /// Because the parts of the expression are not allowed to have more than one
7140 /// use this function iterates over trees, not DAGs. So it never visits the same
7141 /// node more than once.
7142 static const Optional<ByteProvider>
7143 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7144                       bool Root = false) {
7145   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7146   if (Depth == 10)
7147     return None;
7148
7149   if (!Root && !Op.hasOneUse())
7150     return None;
7151
7152   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
7153   unsigned BitWidth = Op.getValueSizeInBits();
7154   if (BitWidth % 8 != 0)
7155     return None;
7156   unsigned ByteWidth = BitWidth / 8;
7157   assert(Index < ByteWidth && "invalid index requested");
7158   (void) ByteWidth;
7159
7160   switch (Op.getOpcode()) {
7161   case ISD::OR: {
7162     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7163     if (!LHS)
7164       return None;
7165     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7166     if (!RHS)
7167       return None;
7168
7169     if (LHS->isConstantZero())
7170       return RHS;
7171     if (RHS->isConstantZero())
7172       return LHS;
7173     return None;
7174   }
7175   case ISD::SHL: {
7176     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7177     if (!ShiftOp)
7178       return None;
7179
7180     uint64_t BitShift = ShiftOp->getZExtValue();
7181     if (BitShift % 8 != 0)
7182       return None;
7183     uint64_t ByteShift = BitShift / 8;
7184
7185     return Index < ByteShift
7186                ? ByteProvider::getConstantZero()
7187                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7188                                        Depth + 1);
7189   }
7190   case ISD::ANY_EXTEND:
7191   case ISD::SIGN_EXTEND:
7192   case ISD::ZERO_EXTEND: {
7193     SDValue NarrowOp = Op->getOperand(0);
7194     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7195     if (NarrowBitWidth % 8 != 0)
7196       return None;
7197     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7198
7199     if (Index >= NarrowByteWidth)
7200       return Op.getOpcode() == ISD::ZERO_EXTEND
7201                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7202                  : None;
7203     return calculateByteProvider(NarrowOp, Index, Depth + 1);
7204   }
7205   case ISD::BSWAP:
7206     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7207                                  Depth + 1);
7208   case ISD::LOAD: {
7209     auto L = cast<LoadSDNode>(Op.getNode());
7210     if (!L->isSimple() || L->isIndexed())
7211       return None;
7212
7213     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7214     if (NarrowBitWidth % 8 != 0)
7215       return None;
7216     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7217
7218     if (Index >= NarrowByteWidth)
7219       return L->getExtensionType() == ISD::ZEXTLOAD
7220                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7221                  : None;
7222     return ByteProvider::getMemory(L, Index);
7223   }
7224   }
7225
7226   return None;
7227 }
7228
7229 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7230   return i;
7231 }
7232
7233 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7234   return BW - i - 1;
7235 }
7236
7237 // Check if the bytes offsets we are looking at match with either big or
7238 // little endian value loaded. Return true for big endian, false for little
7239 // endian, and None if match failed.
7240 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7241                                   int64_t FirstOffset) {
7242   // The endian can be decided only when it is 2 bytes at least.
7243   unsigned Width = ByteOffsets.size();
7244   if (Width < 2)
7245     return None;
7246
7247   bool BigEndian = true, LittleEndian = true;
7248   for (unsigned i = 0; i < Width; i++) {
7249     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7250     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7251     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7252     if (!BigEndian && !LittleEndian)
7253       return None;
7254   }
7255
7256   assert((BigEndian != LittleEndian) && "It should be either big endian or"
7257                                         "little endian");
7258   return BigEndian;
7259 }
7260
7261 static SDValue stripTruncAndExt(SDValue Value) {
7262   switch (Value.getOpcode()) {
7263   case ISD::TRUNCATE:
7264   case ISD::ZERO_EXTEND:
7265   case ISD::SIGN_EXTEND:
7266   case ISD::ANY_EXTEND:
7267     return stripTruncAndExt(Value.getOperand(0));
7268   }
7269   return Value;
7270 }
7271
7272 /// Match a pattern where a wide type scalar value is stored by several narrow
7273 /// stores. Fold it into a single store or a BSWAP and a store if the targets
7274 /// supports it.
7275 ///
7276 /// Assuming little endian target:
7277 ///  i8 *p = ...
7278 ///  i32 val = ...
7279 ///  p[0] = (val >> 0) & 0xFF;
7280 ///  p[1] = (val >> 8) & 0xFF;
7281 ///  p[2] = (val >> 16) & 0xFF;
7282 ///  p[3] = (val >> 24) & 0xFF;
7283 /// =>
7284 ///  *((i32)p) = val;
7285 ///
7286 ///  i8 *p = ...
7287 ///  i32 val = ...
7288 ///  p[0] = (val >> 24) & 0xFF;
7289 ///  p[1] = (val >> 16) & 0xFF;
7290 ///  p[2] = (val >> 8) & 0xFF;
7291 ///  p[3] = (val >> 0) & 0xFF;
7292 /// =>
7293 ///  *((i32)p) = BSWAP(val);
7294 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7295   // The matching looks for "store (trunc x)" patterns that appear early but are
7296   // likely to be replaced by truncating store nodes during combining.
7297   // TODO: If there is evidence that running this later would help, this
7298   //       limitation could be removed. Legality checks may need to be added
7299   //       for the created store and optional bswap/rotate.
7300   if (LegalOperations)
7301     return SDValue();
7302
7303   // We only handle merging simple stores of 1-4 bytes.
7304   // TODO: Allow unordered atomics when wider type is legal (see D66309)
7305   EVT MemVT = N->getMemoryVT();
7306   if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7307       !N->isSimple() || N->isIndexed())
7308     return SDValue();
7309
7310   // Collect all of the stores in the chain.
7311   SDValue Chain = N->getChain();
7312   SmallVector<StoreSDNode *, 8> Stores = {N};
7313   while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7314     // All stores must be the same size to ensure that we are writing all of the
7315     // bytes in the wide value.
7316     // TODO: We could allow multiple sizes by tracking each stored byte.
7317     if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7318         Store->isIndexed())
7319       return SDValue();
7320     Stores.push_back(Store);
7321     Chain = Store->getChain();
7322   }
7323   // There is no reason to continue if we do not have at least a pair of stores.
7324   if (Stores.size() < 2)
7325     return SDValue();
7326
7327   // Handle simple types only.
7328   LLVMContext &Context = *DAG.getContext();
7329   unsigned NumStores = Stores.size();
7330   unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7331   unsigned WideNumBits = NumStores * NarrowNumBits;
7332   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7333   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7334     return SDValue();
7335
7336   // Check if all bytes of the source value that we are looking at are stored
7337   // to the same base address. Collect offsets from Base address into OffsetMap.
7338   SDValue SourceValue;
7339   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7340   int64_t FirstOffset = INT64_MAX;
7341   StoreSDNode *FirstStore = nullptr;
7342   Optional<BaseIndexOffset> Base;
7343   for (auto Store : Stores) {
7344     // All the stores store different parts of the CombinedValue. A truncate is
7345     // required to get the partial value.
7346     SDValue Trunc = Store->getValue();
7347     if (Trunc.getOpcode() != ISD::TRUNCATE)
7348       return SDValue();
7349     // Other than the first/last part, a shift operation is required to get the
7350     // offset.
7351     int64_t Offset = 0;
7352     SDValue WideVal = Trunc.getOperand(0);
7353     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7354         isa<ConstantSDNode>(WideVal.getOperand(1))) {
7355       // The shift amount must be a constant multiple of the narrow type.
7356       // It is translated to the offset address in the wide source value "y".
7357       //
7358       // x = srl y, ShiftAmtC
7359       // i8 z = trunc x
7360       // store z, ...
7361       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7362       if (ShiftAmtC % NarrowNumBits != 0)
7363         return SDValue();
7364
7365       Offset = ShiftAmtC / NarrowNumBits;
7366       WideVal = WideVal.getOperand(0);
7367     }
7368
7369     // Stores must share the same source value with different offsets.
7370     // Truncate and extends should be stripped to get the single source value.
7371     if (!SourceValue)
7372       SourceValue = WideVal;
7373     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7374       return SDValue();
7375     else if (SourceValue.getValueType() != WideVT) {
7376       if (WideVal.getValueType() == WideVT ||
7377           WideVal.getScalarValueSizeInBits() >
7378               SourceValue.getScalarValueSizeInBits())
7379         SourceValue = WideVal;
7380       // Give up if the source value type is smaller than the store size.
7381       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7382         return SDValue();
7383     }
7384
7385     // Stores must share the same base address.
7386     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7387     int64_t ByteOffsetFromBase = 0;
7388     if (!Base)
7389       Base = Ptr;
7390     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7391       return SDValue();
7392
7393     // Remember the first store.
7394     if (ByteOffsetFromBase < FirstOffset) {
7395       FirstStore = Store;
7396       FirstOffset = ByteOffsetFromBase;
7397     }
7398     // Map the offset in the store and the offset in the combined value, and
7399     // early return if it has been set before.
7400     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7401       return SDValue();
7402     OffsetMap[Offset] = ByteOffsetFromBase;
7403   }
7404
7405   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7406   assert(FirstStore && "First store must be set");
7407
7408   // Check that a store of the wide type is both allowed and fast on the target
7409   const DataLayout &Layout = DAG.getDataLayout();
7410   bool Fast = false;
7411   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7412                                         *FirstStore->getMemOperand(), &Fast);
7413   if (!Allowed || !Fast)
7414     return SDValue();
7415
7416   // Check if the pieces of the value are going to the expected places in memory
7417   // to merge the stores.
7418   auto checkOffsets = [&](bool MatchLittleEndian) {
7419     if (MatchLittleEndian) {
7420       for (unsigned i = 0; i != NumStores; ++i)
7421         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7422           return false;
7423     } else { // MatchBigEndian by reversing loop counter.
7424       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7425         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7426           return false;
7427     }
7428     return true;
7429   };
7430
7431   // Check if the offsets line up for the native data layout of this target.
7432   bool NeedBswap = false;
7433   bool NeedRotate = false;
7434   if (!checkOffsets(Layout.isLittleEndian())) {
7435     // Special-case: check if byte offsets line up for the opposite endian.
7436     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7437       NeedBswap = true;
7438     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7439       NeedRotate = true;
7440     else
7441       return SDValue();
7442   }
7443
7444   SDLoc DL(N);
7445   if (WideVT != SourceValue.getValueType()) {
7446     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
7447            "Unexpected store value to merge");
7448     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7449   }
7450
7451   // Before legalize we can introduce illegal bswaps/rotates which will be later
7452   // converted to an explicit bswap sequence. This way we end up with a single
7453   // store and byte shuffling instead of several stores and byte shuffling.
7454   if (NeedBswap) {
7455     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7456   } else if (NeedRotate) {
7457     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
7458     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7459     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7460   }
7461
7462   SDValue NewStore =
7463       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7464                    FirstStore->getPointerInfo(), FirstStore->getAlign());
7465
7466   // Rely on other DAG combine rules to remove the other individual stores.
7467   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7468   return NewStore;
7469 }
7470
7471 /// Match a pattern where a wide type scalar value is loaded by several narrow
7472 /// loads and combined by shifts and ors. Fold it into a single load or a load
7473 /// and a BSWAP if the targets supports it.
7474 ///
7475 /// Assuming little endian target:
7476 ///  i8 *a = ...
7477 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7478 /// =>
7479 ///  i32 val = *((i32)a)
7480 ///
7481 ///  i8 *a = ...
7482 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7483 /// =>
7484 ///  i32 val = BSWAP(*((i32)a))
7485 ///
7486 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7487 /// interact well with the worklist mechanism. When a part of the pattern is
7488 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7489 /// but the root node of the pattern which triggers the load combine is not
7490 /// necessarily a direct user of the changed node. For example, once the address
7491 /// of t28 load is reassociated load combine won't be triggered:
7492 ///             t25: i32 = add t4, Constant:i32<2>
7493 ///           t26: i64 = sign_extend t25
7494 ///        t27: i64 = add t2, t26
7495 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7496 ///     t29: i32 = zero_extend t28
7497 ///   t32: i32 = shl t29, Constant:i8<8>
7498 /// t33: i32 = or t23, t32
7499 /// As a possible fix visitLoad can check if the load can be a part of a load
7500 /// combine pattern and add corresponding OR roots to the worklist.
7501 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7502   assert(N->getOpcode() == ISD::OR &&
7503          "Can only match load combining against OR nodes");
7504
7505   // Handles simple types only
7506   EVT VT = N->getValueType(0);
7507   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7508     return SDValue();
7509   unsigned ByteWidth = VT.getSizeInBits() / 8;
7510
7511   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7512   auto MemoryByteOffset = [&] (ByteProvider P) {
7513     assert(P.isMemory() && "Must be a memory byte provider");
7514     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7515     assert(LoadBitWidth % 8 == 0 &&
7516            "can only analyze providers for individual bytes not bit");
7517     unsigned LoadByteWidth = LoadBitWidth / 8;
7518     return IsBigEndianTarget
7519             ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7520             : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7521   };
7522
7523   Optional<BaseIndexOffset> Base;
7524   SDValue Chain;
7525
7526   SmallPtrSet<LoadSDNode *, 8> Loads;
7527   Optional<ByteProvider> FirstByteProvider;
7528   int64_t FirstOffset = INT64_MAX;
7529
7530   // Check if all the bytes of the OR we are looking at are loaded from the same
7531   // base address. Collect bytes offsets from Base address in ByteOffsets.
7532   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7533   unsigned ZeroExtendedBytes = 0;
7534   for (int i = ByteWidth - 1; i >= 0; --i) {
7535     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7536     if (!P)
7537       return SDValue();
7538
7539     if (P->isConstantZero()) {
7540       // It's OK for the N most significant bytes to be 0, we can just
7541       // zero-extend the load.
7542       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7543         return SDValue();
7544       continue;
7545     }
7546     assert(P->isMemory() && "provenance should either be memory or zero");
7547
7548     LoadSDNode *L = P->Load;
7549     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7550            !L->isIndexed() &&
7551            "Must be enforced by calculateByteProvider");
7552     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7553
7554     // All loads must share the same chain
7555     SDValue LChain = L->getChain();
7556     if (!Chain)
7557       Chain = LChain;
7558     else if (Chain != LChain)
7559       return SDValue();
7560
7561     // Loads must share the same base address
7562     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7563     int64_t ByteOffsetFromBase = 0;
7564     if (!Base)
7565       Base = Ptr;
7566     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7567       return SDValue();
7568
7569     // Calculate the offset of the current byte from the base address
7570     ByteOffsetFromBase += MemoryByteOffset(*P);
7571     ByteOffsets[i] = ByteOffsetFromBase;
7572
7573     // Remember the first byte load
7574     if (ByteOffsetFromBase < FirstOffset) {
7575       FirstByteProvider = P;
7576       FirstOffset = ByteOffsetFromBase;
7577     }
7578
7579     Loads.insert(L);
7580   }
7581   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7582          "memory, so there must be at least one load which produces the value");
7583   assert(Base && "Base address of the accessed memory location must be set");
7584   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7585
7586   bool NeedsZext = ZeroExtendedBytes > 0;
7587
7588   EVT MemVT =
7589       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7590
7591   if (!MemVT.isSimple())
7592     return SDValue();
7593
7594   // Before legalize we can introduce too wide illegal loads which will be later
7595   // split into legal sized loads. This enables us to combine i64 load by i8
7596   // patterns to a couple of i32 loads on 32 bit targets.
7597   if (LegalOperations &&
7598       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7599                             MemVT))
7600     return SDValue();
7601
7602   // Check if the bytes of the OR we are looking at match with either big or
7603   // little endian value load
7604   Optional<bool> IsBigEndian = isBigEndian(
7605       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7606   if (!IsBigEndian.hasValue())
7607     return SDValue();
7608
7609   assert(FirstByteProvider && "must be set");
7610
7611   // Ensure that the first byte is loaded from zero offset of the first load.
7612   // So the combined value can be loaded from the first load address.
7613   if (MemoryByteOffset(*FirstByteProvider) != 0)
7614     return SDValue();
7615   LoadSDNode *FirstLoad = FirstByteProvider->Load;
7616
7617   // The node we are looking at matches with the pattern, check if we can
7618   // replace it with a single (possibly zero-extended) load and bswap + shift if
7619   // needed.
7620
7621   // If the load needs byte swap check if the target supports it
7622   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7623
7624   // Before legalize we can introduce illegal bswaps which will be later
7625   // converted to an explicit bswap sequence. This way we end up with a single
7626   // load and byte shuffling instead of several loads and byte shuffling.
7627   // We do not introduce illegal bswaps when zero-extending as this tends to
7628   // introduce too many arithmetic instructions.
7629   if (NeedsBswap && (LegalOperations || NeedsZext) &&
7630       !TLI.isOperationLegal(ISD::BSWAP, VT))
7631     return SDValue();
7632
7633   // If we need to bswap and zero extend, we have to insert a shift. Check that
7634   // it is legal.
7635   if (NeedsBswap && NeedsZext && LegalOperations &&
7636       !TLI.isOperationLegal(ISD::SHL, VT))
7637     return SDValue();
7638
7639   // Check that a load of the wide type is both allowed and fast on the target
7640   bool Fast = false;
7641   bool Allowed =
7642       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7643                              *FirstLoad->getMemOperand(), &Fast);
7644   if (!Allowed || !Fast)
7645     return SDValue();
7646
7647   SDValue NewLoad =
7648       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7649                      Chain, FirstLoad->getBasePtr(),
7650                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7651
7652   // Transfer chain users from old loads to the new load.
7653   for (LoadSDNode *L : Loads)
7654     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7655
7656   if (!NeedsBswap)
7657     return NewLoad;
7658
7659   SDValue ShiftedLoad =
7660       NeedsZext
7661           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7662                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7663                                                    SDLoc(N), LegalOperations))
7664           : NewLoad;
7665   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7666 }
7667
7668 // If the target has andn, bsl, or a similar bit-select instruction,
7669 // we want to unfold masked merge, with canonical pattern of:
7670 //   |        A  |  |B|
7671 //   ((x ^ y) & m) ^ y
7672 //    |  D  |
7673 // Into:
7674 //   (x & m) | (y & ~m)
7675 // If y is a constant, and the 'andn' does not work with immediates,
7676 // we unfold into a different pattern:
7677 //   ~(~x & m) & (m | y)
7678 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7679 //       the very least that breaks andnpd / andnps patterns, and because those
7680 //       patterns are simplified in IR and shouldn't be created in the DAG
7681 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7682   assert(N->getOpcode() == ISD::XOR);
7683
7684   // Don't touch 'not' (i.e. where y = -1).
7685   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7686     return SDValue();
7687
7688   EVT VT = N->getValueType(0);
7689
7690   // There are 3 commutable operators in the pattern,
7691   // so we have to deal with 8 possible variants of the basic pattern.
7692   SDValue X, Y, M;
7693   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7694     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7695       return false;
7696     SDValue Xor = And.getOperand(XorIdx);
7697     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7698       return false;
7699     SDValue Xor0 = Xor.getOperand(0);
7700     SDValue Xor1 = Xor.getOperand(1);
7701     // Don't touch 'not' (i.e. where y = -1).
7702     if (isAllOnesOrAllOnesSplat(Xor1))
7703       return false;
7704     if (Other == Xor0)
7705       std::swap(Xor0, Xor1);
7706     if (Other != Xor1)
7707       return false;
7708     X = Xor0;
7709     Y = Xor1;
7710     M = And.getOperand(XorIdx ? 0 : 1);
7711     return true;
7712   };
7713
7714   SDValue N0 = N->getOperand(0);
7715   SDValue N1 = N->getOperand(1);
7716   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7717       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7718     return SDValue();
7719
7720   // Don't do anything if the mask is constant. This should not be reachable.
7721   // InstCombine should have already unfolded this pattern, and DAGCombiner
7722   // probably shouldn't produce it, too.
7723   if (isa<ConstantSDNode>(M.getNode()))
7724     return SDValue();
7725
7726   // We can transform if the target has AndNot
7727   if (!TLI.hasAndNot(M))
7728     return SDValue();
7729
7730   SDLoc DL(N);
7731
7732   // If Y is a constant, check that 'andn' works with immediates.
7733   if (!TLI.hasAndNot(Y)) {
7734     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7735     // If not, we need to do a bit more work to make sure andn is still used.
7736     SDValue NotX = DAG.getNOT(DL, X, VT);
7737     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7738     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7739     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7740     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7741   }
7742
7743   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7744   SDValue NotM = DAG.getNOT(DL, M, VT);
7745   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7746
7747   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7748 }
7749
7750 SDValue DAGCombiner::visitXOR(SDNode *N) {
7751   SDValue N0 = N->getOperand(0);
7752   SDValue N1 = N->getOperand(1);
7753   EVT VT = N0.getValueType();
7754
7755   // fold vector ops
7756   if (VT.isVector()) {
7757     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7758       return FoldedVOp;
7759
7760     // fold (xor x, 0) -> x, vector edition
7761     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
7762       return N1;
7763     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7764       return N0;
7765   }
7766
7767   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7768   SDLoc DL(N);
7769   if (N0.isUndef() && N1.isUndef())
7770     return DAG.getConstant(0, DL, VT);
7771
7772   // fold (xor x, undef) -> undef
7773   if (N0.isUndef())
7774     return N0;
7775   if (N1.isUndef())
7776     return N1;
7777
7778   // fold (xor c1, c2) -> c1^c2
7779   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7780     return C;
7781
7782   // canonicalize constant to RHS
7783   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7784      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7785     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7786
7787   // fold (xor x, 0) -> x
7788   if (isNullConstant(N1))
7789     return N0;
7790
7791   if (SDValue NewSel = foldBinOpIntoSelect(N))
7792     return NewSel;
7793
7794   // reassociate xor
7795   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7796     return RXOR;
7797
7798   // fold !(x cc y) -> (x !cc y)
7799   unsigned N0Opcode = N0.getOpcode();
7800   SDValue LHS, RHS, CC;
7801   if (TLI.isConstTrueVal(N1.getNode()) &&
7802       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7803     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7804                                                LHS.getValueType());
7805     if (!LegalOperations ||
7806         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7807       switch (N0Opcode) {
7808       default:
7809         llvm_unreachable("Unhandled SetCC Equivalent!");
7810       case ISD::SETCC:
7811         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7812       case ISD::SELECT_CC:
7813         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7814                                N0.getOperand(3), NotCC);
7815       case ISD::STRICT_FSETCC:
7816       case ISD::STRICT_FSETCCS: {
7817         if (N0.hasOneUse()) {
7818           // FIXME Can we handle multiple uses? Could we token factor the chain
7819           // results from the new/old setcc?
7820           SDValue SetCC =
7821               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7822                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
7823           CombineTo(N, SetCC);
7824           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7825           recursivelyDeleteUnusedNodes(N0.getNode());
7826           return SDValue(N, 0); // Return N so it doesn't get rechecked!
7827         }
7828         break;
7829       }
7830       }
7831     }
7832   }
7833
7834   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7835   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7836       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7837     SDValue V = N0.getOperand(0);
7838     SDLoc DL0(N0);
7839     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7840                     DAG.getConstant(1, DL0, V.getValueType()));
7841     AddToWorklist(V.getNode());
7842     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7843   }
7844
7845   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7846   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7847       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7848     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7849     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7850       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7851       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7852       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7853       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7854       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7855     }
7856   }
7857   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7858   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7859       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7860     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7861     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7862       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7863       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7864       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7865       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7866       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7867     }
7868   }
7869
7870   // fold (not (neg x)) -> (add X, -1)
7871   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7872   // Y is a constant or the subtract has a single use.
7873   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7874       isNullConstant(N0.getOperand(0))) {
7875     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7876                        DAG.getAllOnesConstant(DL, VT));
7877   }
7878
7879   // fold (not (add X, -1)) -> (neg X)
7880   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7881       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7882     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7883                        N0.getOperand(0));
7884   }
7885
7886   // fold (xor (and x, y), y) -> (and (not x), y)
7887   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7888     SDValue X = N0.getOperand(0);
7889     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7890     AddToWorklist(NotX.getNode());
7891     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7892   }
7893
7894   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7895     ConstantSDNode *XorC = isConstOrConstSplat(N1);
7896     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7897     unsigned BitWidth = VT.getScalarSizeInBits();
7898     if (XorC && ShiftC) {
7899       // Don't crash on an oversized shift. We can not guarantee that a bogus
7900       // shift has been simplified to undef.
7901       uint64_t ShiftAmt = ShiftC->getLimitedValue();
7902       if (ShiftAmt < BitWidth) {
7903         APInt Ones = APInt::getAllOnesValue(BitWidth);
7904         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7905         if (XorC->getAPIntValue() == Ones) {
7906           // If the xor constant is a shifted -1, do a 'not' before the shift:
7907           // xor (X << ShiftC), XorC --> (not X) << ShiftC
7908           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7909           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7910           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7911         }
7912       }
7913     }
7914   }
7915
7916   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7917   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7918     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7919     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7920     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7921       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7922       SDValue S0 = S.getOperand(0);
7923       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
7924         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7925           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
7926             return DAG.getNode(ISD::ABS, DL, VT, S0);
7927     }
7928   }
7929
7930   // fold (xor x, x) -> 0
7931   if (N0 == N1)
7932     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7933
7934   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7935   // Here is a concrete example of this equivalence:
7936   // i16   x ==  14
7937   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7938   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7939   //
7940   // =>
7941   //
7942   // i16     ~1      == 0b1111111111111110
7943   // i16 rol(~1, 14) == 0b1011111111111111
7944   //
7945   // Some additional tips to help conceptualize this transform:
7946   // - Try to see the operation as placing a single zero in a value of all ones.
7947   // - There exists no value for x which would allow the result to contain zero.
7948   // - Values of x larger than the bitwidth are undefined and do not require a
7949   //   consistent result.
7950   // - Pushing the zero left requires shifting one bits in from the right.
7951   // A rotate left of ~1 is a nice way of achieving the desired result.
7952   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7953       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7954     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7955                        N0.getOperand(1));
7956   }
7957
7958   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7959   if (N0Opcode == N1.getOpcode())
7960     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7961       return V;
7962
7963   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7964   if (SDValue MM = unfoldMaskedMerge(N))
7965     return MM;
7966
7967   // Simplify the expression using non-local knowledge.
7968   if (SimplifyDemandedBits(SDValue(N, 0)))
7969     return SDValue(N, 0);
7970
7971   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7972     return Combined;
7973
7974   return SDValue();
7975 }
7976
7977 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7978 /// shift-by-constant operand with identical opcode, we may be able to convert
7979 /// that into 2 independent shifts followed by the logic op. This is a
7980 /// throughput improvement.
7981 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7982   // Match a one-use bitwise logic op.
7983   SDValue LogicOp = Shift->getOperand(0);
7984   if (!LogicOp.hasOneUse())
7985     return SDValue();
7986
7987   unsigned LogicOpcode = LogicOp.getOpcode();
7988   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7989       LogicOpcode != ISD::XOR)
7990     return SDValue();
7991
7992   // Find a matching one-use shift by constant.
7993   unsigned ShiftOpcode = Shift->getOpcode();
7994   SDValue C1 = Shift->getOperand(1);
7995   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7996   assert(C1Node && "Expected a shift with constant operand");
7997   const APInt &C1Val = C1Node->getAPIntValue();
7998   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7999                              const APInt *&ShiftAmtVal) {
8000     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8001       return false;
8002
8003     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8004     if (!ShiftCNode)
8005       return false;
8006
8007     // Capture the shifted operand and shift amount value.
8008     ShiftOp = V.getOperand(0);
8009     ShiftAmtVal = &ShiftCNode->getAPIntValue();
8010
8011     // Shift amount types do not have to match their operand type, so check that
8012     // the constants are the same width.
8013     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8014       return false;
8015
8016     // The fold is not valid if the sum of the shift values exceeds bitwidth.
8017     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8018       return false;
8019
8020     return true;
8021   };
8022
8023   // Logic ops are commutative, so check each operand for a match.
8024   SDValue X, Y;
8025   const APInt *C0Val;
8026   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8027     Y = LogicOp.getOperand(1);
8028   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8029     Y = LogicOp.getOperand(0);
8030   else
8031     return SDValue();
8032
8033   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8034   SDLoc DL(Shift);
8035   EVT VT = Shift->getValueType(0);
8036   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8037   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8038   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8039   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8040   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8041 }
8042
8043 /// Handle transforms common to the three shifts, when the shift amount is a
8044 /// constant.
8045 /// We are looking for: (shift being one of shl/sra/srl)
8046 ///   shift (binop X, C0), C1
8047 /// And want to transform into:
8048 ///   binop (shift X, C1), (shift C0, C1)
8049 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8050   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
8051
8052   // Do not turn a 'not' into a regular xor.
8053   if (isBitwiseNot(N->getOperand(0)))
8054     return SDValue();
8055
8056   // The inner binop must be one-use, since we want to replace it.
8057   SDValue LHS = N->getOperand(0);
8058   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8059     return SDValue();
8060
8061   // TODO: This is limited to early combining because it may reveal regressions
8062   //       otherwise. But since we just checked a target hook to see if this is
8063   //       desirable, that should have filtered out cases where this interferes
8064   //       with some other pattern matching.
8065   if (!LegalTypes)
8066     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8067       return R;
8068
8069   // We want to pull some binops through shifts, so that we have (and (shift))
8070   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
8071   // thing happens with address calculations, so it's important to canonicalize
8072   // it.
8073   switch (LHS.getOpcode()) {
8074   default:
8075     return SDValue();
8076   case ISD::OR:
8077   case ISD::XOR:
8078   case ISD::AND:
8079     break;
8080   case ISD::ADD:
8081     if (N->getOpcode() != ISD::SHL)
8082       return SDValue(); // only shl(add) not sr[al](add).
8083     break;
8084   }
8085
8086   // We require the RHS of the binop to be a constant and not opaque as well.
8087   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8088   if (!BinOpCst)
8089     return SDValue();
8090
8091   // FIXME: disable this unless the input to the binop is a shift by a constant
8092   // or is copy/select. Enable this in other cases when figure out it's exactly
8093   // profitable.
8094   SDValue BinOpLHSVal = LHS.getOperand(0);
8095   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8096                             BinOpLHSVal.getOpcode() == ISD::SRA ||
8097                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
8098                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8099   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8100                         BinOpLHSVal.getOpcode() == ISD::SELECT;
8101
8102   if (!IsShiftByConstant && !IsCopyOrSelect)
8103     return SDValue();
8104
8105   if (IsCopyOrSelect && N->hasOneUse())
8106     return SDValue();
8107
8108   // Fold the constants, shifting the binop RHS by the shift amount.
8109   SDLoc DL(N);
8110   EVT VT = N->getValueType(0);
8111   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8112                                N->getOperand(1));
8113   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
8114
8115   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8116                                  N->getOperand(1));
8117   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8118 }
8119
8120 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8121   assert(N->getOpcode() == ISD::TRUNCATE);
8122   assert(N->getOperand(0).getOpcode() == ISD::AND);
8123
8124   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8125   EVT TruncVT = N->getValueType(0);
8126   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8127       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8128     SDValue N01 = N->getOperand(0).getOperand(1);
8129     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8130       SDLoc DL(N);
8131       SDValue N00 = N->getOperand(0).getOperand(0);
8132       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8133       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8134       AddToWorklist(Trunc00.getNode());
8135       AddToWorklist(Trunc01.getNode());
8136       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8137     }
8138   }
8139
8140   return SDValue();
8141 }
8142
8143 SDValue DAGCombiner::visitRotate(SDNode *N) {
8144   SDLoc dl(N);
8145   SDValue N0 = N->getOperand(0);
8146   SDValue N1 = N->getOperand(1);
8147   EVT VT = N->getValueType(0);
8148   unsigned Bitsize = VT.getScalarSizeInBits();
8149
8150   // fold (rot x, 0) -> x
8151   if (isNullOrNullSplat(N1))
8152     return N0;
8153
8154   // fold (rot x, c) -> x iff (c % BitSize) == 0
8155   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8156     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8157     if (DAG.MaskedValueIsZero(N1, ModuloMask))
8158       return N0;
8159   }
8160
8161   // fold (rot x, c) -> (rot x, c % BitSize)
8162   bool OutOfRange = false;
8163   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8164     OutOfRange |= C->getAPIntValue().uge(Bitsize);
8165     return true;
8166   };
8167   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8168     EVT AmtVT = N1.getValueType();
8169     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8170     if (SDValue Amt =
8171             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8172       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8173   }
8174
8175   // rot i16 X, 8 --> bswap X
8176   auto *RotAmtC = isConstOrConstSplat(N1);
8177   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8178       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8179     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8180
8181   // Simplify the operands using demanded-bits information.
8182   if (SimplifyDemandedBits(SDValue(N, 0)))
8183     return SDValue(N, 0);
8184
8185   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8186   if (N1.getOpcode() == ISD::TRUNCATE &&
8187       N1.getOperand(0).getOpcode() == ISD::AND) {
8188     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8189       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8190   }
8191
8192   unsigned NextOp = N0.getOpcode();
8193   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8194   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8195     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8196     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8197     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8198       EVT ShiftVT = C1->getValueType(0);
8199       bool SameSide = (N->getOpcode() == NextOp);
8200       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8201       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8202               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8203         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8204         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8205             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8206         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8207                            CombinedShiftNorm);
8208       }
8209     }
8210   }
8211   return SDValue();
8212 }
8213
8214 SDValue DAGCombiner::visitSHL(SDNode *N) {
8215   SDValue N0 = N->getOperand(0);
8216   SDValue N1 = N->getOperand(1);
8217   if (SDValue V = DAG.simplifyShift(N0, N1))
8218     return V;
8219
8220   EVT VT = N0.getValueType();
8221   EVT ShiftVT = N1.getValueType();
8222   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8223
8224   // fold vector ops
8225   if (VT.isVector()) {
8226     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8227       return FoldedVOp;
8228
8229     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8230     // If setcc produces all-one true value then:
8231     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8232     if (N1CV && N1CV->isConstant()) {
8233       if (N0.getOpcode() == ISD::AND) {
8234         SDValue N00 = N0->getOperand(0);
8235         SDValue N01 = N0->getOperand(1);
8236         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8237
8238         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8239             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8240                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8241           if (SDValue C =
8242                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8243             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8244         }
8245       }
8246     }
8247   }
8248
8249   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8250
8251   // fold (shl c1, c2) -> c1<<c2
8252   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8253     return C;
8254
8255   if (SDValue NewSel = foldBinOpIntoSelect(N))
8256     return NewSel;
8257
8258   // if (shl x, c) is known to be zero, return 0
8259   if (DAG.MaskedValueIsZero(SDValue(N, 0),
8260                             APInt::getAllOnesValue(OpSizeInBits)))
8261     return DAG.getConstant(0, SDLoc(N), VT);
8262
8263   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8264   if (N1.getOpcode() == ISD::TRUNCATE &&
8265       N1.getOperand(0).getOpcode() == ISD::AND) {
8266     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8267       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8268   }
8269
8270   if (SimplifyDemandedBits(SDValue(N, 0)))
8271     return SDValue(N, 0);
8272
8273   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8274   if (N0.getOpcode() == ISD::SHL) {
8275     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8276                                           ConstantSDNode *RHS) {
8277       APInt c1 = LHS->getAPIntValue();
8278       APInt c2 = RHS->getAPIntValue();
8279       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8280       return (c1 + c2).uge(OpSizeInBits);
8281     };
8282     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8283       return DAG.getConstant(0, SDLoc(N), VT);
8284
8285     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8286                                        ConstantSDNode *RHS) {
8287       APInt c1 = LHS->getAPIntValue();
8288       APInt c2 = RHS->getAPIntValue();
8289       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8290       return (c1 + c2).ult(OpSizeInBits);
8291     };
8292     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8293       SDLoc DL(N);
8294       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8295       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8296     }
8297   }
8298
8299   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8300   // For this to be valid, the second form must not preserve any of the bits
8301   // that are shifted out by the inner shift in the first form.  This means
8302   // the outer shift size must be >= the number of bits added by the ext.
8303   // As a corollary, we don't care what kind of ext it is.
8304   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8305        N0.getOpcode() == ISD::ANY_EXTEND ||
8306        N0.getOpcode() == ISD::SIGN_EXTEND) &&
8307       N0.getOperand(0).getOpcode() == ISD::SHL) {
8308     SDValue N0Op0 = N0.getOperand(0);
8309     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8310     EVT InnerVT = N0Op0.getValueType();
8311     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8312
8313     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8314                                                          ConstantSDNode *RHS) {
8315       APInt c1 = LHS->getAPIntValue();
8316       APInt c2 = RHS->getAPIntValue();
8317       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8318       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8319              (c1 + c2).uge(OpSizeInBits);
8320     };
8321     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8322                                   /*AllowUndefs*/ false,
8323                                   /*AllowTypeMismatch*/ true))
8324       return DAG.getConstant(0, SDLoc(N), VT);
8325
8326     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8327                                                       ConstantSDNode *RHS) {
8328       APInt c1 = LHS->getAPIntValue();
8329       APInt c2 = RHS->getAPIntValue();
8330       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8331       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8332              (c1 + c2).ult(OpSizeInBits);
8333     };
8334     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8335                                   /*AllowUndefs*/ false,
8336                                   /*AllowTypeMismatch*/ true)) {
8337       SDLoc DL(N);
8338       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8339       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8340       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8341       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8342     }
8343   }
8344
8345   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8346   // Only fold this if the inner zext has no other uses to avoid increasing
8347   // the total number of instructions.
8348   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8349       N0.getOperand(0).getOpcode() == ISD::SRL) {
8350     SDValue N0Op0 = N0.getOperand(0);
8351     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8352
8353     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8354       APInt c1 = LHS->getAPIntValue();
8355       APInt c2 = RHS->getAPIntValue();
8356       zeroExtendToMatch(c1, c2);
8357       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8358     };
8359     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8360                                   /*AllowUndefs*/ false,
8361                                   /*AllowTypeMismatch*/ true)) {
8362       SDLoc DL(N);
8363       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8364       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8365       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8366       AddToWorklist(NewSHL.getNode());
8367       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8368     }
8369   }
8370
8371   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
8372   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
8373   // TODO - support non-uniform vector shift amounts.
8374   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8375       N0->getFlags().hasExact()) {
8376     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8377       uint64_t C1 = N0C1->getZExtValue();
8378       uint64_t C2 = N1C->getZExtValue();
8379       SDLoc DL(N);
8380       if (C1 <= C2)
8381         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8382                            DAG.getConstant(C2 - C1, DL, ShiftVT));
8383       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8384                          DAG.getConstant(C1 - C2, DL, ShiftVT));
8385     }
8386   }
8387
8388   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8389   //                               (and (srl x, (sub c1, c2), MASK)
8390   // Only fold this if the inner shift has no other uses -- if it does, folding
8391   // this will increase the total number of instructions.
8392   // TODO - drop hasOneUse requirement if c1 == c2?
8393   // TODO - support non-uniform vector shift amounts.
8394   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8395       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8396     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8397       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8398         uint64_t c1 = N0C1->getZExtValue();
8399         uint64_t c2 = N1C->getZExtValue();
8400         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8401         SDValue Shift;
8402         if (c2 > c1) {
8403           Mask <<= c2 - c1;
8404           SDLoc DL(N);
8405           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8406                               DAG.getConstant(c2 - c1, DL, ShiftVT));
8407         } else {
8408           Mask.lshrInPlace(c1 - c2);
8409           SDLoc DL(N);
8410           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8411                               DAG.getConstant(c1 - c2, DL, ShiftVT));
8412         }
8413         SDLoc DL(N0);
8414         return DAG.getNode(ISD::AND, DL, VT, Shift,
8415                            DAG.getConstant(Mask, DL, VT));
8416       }
8417     }
8418   }
8419
8420   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8421   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8422       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8423     SDLoc DL(N);
8424     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8425     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8426     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8427   }
8428
8429   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8430   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8431   // Variant of version done on multiply, except mul by a power of 2 is turned
8432   // into a shift.
8433   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8434       N0.getNode()->hasOneUse() &&
8435       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8436       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8437       TLI.isDesirableToCommuteWithShift(N, Level)) {
8438     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8439     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8440     AddToWorklist(Shl0.getNode());
8441     AddToWorklist(Shl1.getNode());
8442     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8443   }
8444
8445   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8446   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8447       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8448       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8449     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8450     if (isConstantOrConstantVector(Shl))
8451       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8452   }
8453
8454   if (N1C && !N1C->isOpaque())
8455     if (SDValue NewSHL = visitShiftByConstant(N))
8456       return NewSHL;
8457
8458   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8459   if (N0.getOpcode() == ISD::VSCALE)
8460     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8461       const APInt &C0 = N0.getConstantOperandAPInt(0);
8462       const APInt &C1 = NC1->getAPIntValue();
8463       return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8464     }
8465
8466   // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8467   APInt ShlVal;
8468   if (N0.getOpcode() == ISD::STEP_VECTOR)
8469     if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8470       const APInt &C0 = N0.getConstantOperandAPInt(0);
8471       if (ShlVal.ult(C0.getBitWidth())) {
8472         APInt NewStep = C0 << ShlVal;
8473         return DAG.getStepVector(SDLoc(N), VT, NewStep);
8474       }
8475     }
8476
8477   return SDValue();
8478 }
8479
8480 // Transform a right shift of a multiply into a multiply-high.
8481 // Examples:
8482 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8483 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8484 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8485                                   const TargetLowering &TLI) {
8486   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8487          "SRL or SRA node is required here!");
8488
8489   // Check the shift amount. Proceed with the transformation if the shift
8490   // amount is constant.
8491   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8492   if (!ShiftAmtSrc)
8493     return SDValue();
8494
8495   SDLoc DL(N);
8496
8497   // The operation feeding into the shift must be a multiply.
8498   SDValue ShiftOperand = N->getOperand(0);
8499   if (ShiftOperand.getOpcode() != ISD::MUL)
8500     return SDValue();
8501
8502   // Both operands must be equivalent extend nodes.
8503   SDValue LeftOp = ShiftOperand.getOperand(0);
8504   SDValue RightOp = ShiftOperand.getOperand(1);
8505   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8506   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8507
8508   if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8509     return SDValue();
8510
8511   EVT WideVT1 = LeftOp.getValueType();
8512   EVT WideVT2 = RightOp.getValueType();
8513   (void)WideVT2;
8514   // Proceed with the transformation if the wide types match.
8515   assert((WideVT1 == WideVT2) &&
8516          "Cannot have a multiply node with two different operand types.");
8517
8518   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8519   // Check that the two extend nodes are the same type.
8520   if (NarrowVT !=  RightOp.getOperand(0).getValueType())
8521     return SDValue();
8522
8523   // Proceed with the transformation if the wide type is twice as large
8524   // as the narrow type.
8525   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8526   if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8527     return SDValue();
8528
8529   // Check the shift amount with the narrow type size.
8530   // Proceed with the transformation if the shift amount is the width
8531   // of the narrow type.
8532   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8533   if (ShiftAmt != NarrowVTSize)
8534     return SDValue();
8535
8536   // If the operation feeding into the MUL is a sign extend (sext),
8537   // we use mulhs. Othewise, zero extends (zext) use mulhu.
8538   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8539
8540   // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8541   if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8542     return SDValue();
8543
8544   SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8545                                RightOp.getOperand(0));
8546   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8547                                      : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8548 }
8549
8550 SDValue DAGCombiner::visitSRA(SDNode *N) {
8551   SDValue N0 = N->getOperand(0);
8552   SDValue N1 = N->getOperand(1);
8553   if (SDValue V = DAG.simplifyShift(N0, N1))
8554     return V;
8555
8556   EVT VT = N0.getValueType();
8557   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8558
8559   // Arithmetic shifting an all-sign-bit value is a no-op.
8560   // fold (sra 0, x) -> 0
8561   // fold (sra -1, x) -> -1
8562   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8563     return N0;
8564
8565   // fold vector ops
8566   if (VT.isVector())
8567     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8568       return FoldedVOp;
8569
8570   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8571
8572   // fold (sra c1, c2) -> (sra c1, c2)
8573   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8574     return C;
8575
8576   if (SDValue NewSel = foldBinOpIntoSelect(N))
8577     return NewSel;
8578
8579   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8580   // sext_inreg.
8581   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8582     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8583     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8584     if (VT.isVector())
8585       ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8586                                VT.getVectorElementCount());
8587     if (!LegalOperations ||
8588         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8589         TargetLowering::Legal)
8590       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8591                          N0.getOperand(0), DAG.getValueType(ExtVT));
8592     // Even if we can't convert to sext_inreg, we might be able to remove
8593     // this shift pair if the input is already sign extended.
8594     if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8595       return N0.getOperand(0);
8596   }
8597
8598   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8599   // clamp (add c1, c2) to max shift.
8600   if (N0.getOpcode() == ISD::SRA) {
8601     SDLoc DL(N);
8602     EVT ShiftVT = N1.getValueType();
8603     EVT ShiftSVT = ShiftVT.getScalarType();
8604     SmallVector<SDValue, 16> ShiftValues;
8605
8606     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8607       APInt c1 = LHS->getAPIntValue();
8608       APInt c2 = RHS->getAPIntValue();
8609       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8610       APInt Sum = c1 + c2;
8611       unsigned ShiftSum =
8612           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8613       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8614       return true;
8615     };
8616     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8617       SDValue ShiftValue;
8618       if (VT.isVector())
8619         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8620       else
8621         ShiftValue = ShiftValues[0];
8622       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8623     }
8624   }
8625
8626   // fold (sra (shl X, m), (sub result_size, n))
8627   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8628   // result_size - n != m.
8629   // If truncate is free for the target sext(shl) is likely to result in better
8630   // code.
8631   if (N0.getOpcode() == ISD::SHL && N1C) {
8632     // Get the two constanst of the shifts, CN0 = m, CN = n.
8633     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8634     if (N01C) {
8635       LLVMContext &Ctx = *DAG.getContext();
8636       // Determine what the truncate's result bitsize and type would be.
8637       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8638
8639       if (VT.isVector())
8640         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8641
8642       // Determine the residual right-shift amount.
8643       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8644
8645       // If the shift is not a no-op (in which case this should be just a sign
8646       // extend already), the truncated to type is legal, sign_extend is legal
8647       // on that type, and the truncate to that type is both legal and free,
8648       // perform the transform.
8649       if ((ShiftAmt > 0) &&
8650           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8651           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8652           TLI.isTruncateFree(VT, TruncVT)) {
8653         SDLoc DL(N);
8654         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8655             getShiftAmountTy(N0.getOperand(0).getValueType()));
8656         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8657                                     N0.getOperand(0), Amt);
8658         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8659                                     Shift);
8660         return DAG.getNode(ISD::SIGN_EXTEND, DL,
8661                            N->getValueType(0), Trunc);
8662       }
8663     }
8664   }
8665
8666   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8667   //   sra (add (shl X, N1C), AddC), N1C -->
8668   //   sext (add (trunc X to (width - N1C)), AddC')
8669   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8670       N0.getOperand(0).getOpcode() == ISD::SHL &&
8671       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8672     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8673       SDValue Shl = N0.getOperand(0);
8674       // Determine what the truncate's type would be and ask the target if that
8675       // is a free operation.
8676       LLVMContext &Ctx = *DAG.getContext();
8677       unsigned ShiftAmt = N1C->getZExtValue();
8678       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8679       if (VT.isVector())
8680         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8681
8682       // TODO: The simple type check probably belongs in the default hook
8683       //       implementation and/or target-specific overrides (because
8684       //       non-simple types likely require masking when legalized), but that
8685       //       restriction may conflict with other transforms.
8686       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8687           TLI.isTruncateFree(VT, TruncVT)) {
8688         SDLoc DL(N);
8689         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8690         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8691                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8692         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8693         return DAG.getSExtOrTrunc(Add, DL, VT);
8694       }
8695     }
8696   }
8697
8698   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8699   if (N1.getOpcode() == ISD::TRUNCATE &&
8700       N1.getOperand(0).getOpcode() == ISD::AND) {
8701     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8702       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8703   }
8704
8705   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8706   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8707   //      if c1 is equal to the number of bits the trunc removes
8708   // TODO - support non-uniform vector shift amounts.
8709   if (N0.getOpcode() == ISD::TRUNCATE &&
8710       (N0.getOperand(0).getOpcode() == ISD::SRL ||
8711        N0.getOperand(0).getOpcode() == ISD::SRA) &&
8712       N0.getOperand(0).hasOneUse() &&
8713       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8714     SDValue N0Op0 = N0.getOperand(0);
8715     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8716       EVT LargeVT = N0Op0.getValueType();
8717       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8718       if (LargeShift->getAPIntValue() == TruncBits) {
8719         SDLoc DL(N);
8720         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8721                                       getShiftAmountTy(LargeVT));
8722         SDValue SRA =
8723             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8724         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8725       }
8726     }
8727   }
8728
8729   // Simplify, based on bits shifted out of the LHS.
8730   if (SimplifyDemandedBits(SDValue(N, 0)))
8731     return SDValue(N, 0);
8732
8733   // If the sign bit is known to be zero, switch this to a SRL.
8734   if (DAG.SignBitIsZero(N0))
8735     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8736
8737   if (N1C && !N1C->isOpaque())
8738     if (SDValue NewSRA = visitShiftByConstant(N))
8739       return NewSRA;
8740
8741   // Try to transform this shift into a multiply-high if
8742   // it matches the appropriate pattern detected in combineShiftToMULH.
8743   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8744     return MULH;
8745
8746   return SDValue();
8747 }
8748
8749 SDValue DAGCombiner::visitSRL(SDNode *N) {
8750   SDValue N0 = N->getOperand(0);
8751   SDValue N1 = N->getOperand(1);
8752   if (SDValue V = DAG.simplifyShift(N0, N1))
8753     return V;
8754
8755   EVT VT = N0.getValueType();
8756   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8757
8758   // fold vector ops
8759   if (VT.isVector())
8760     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8761       return FoldedVOp;
8762
8763   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8764
8765   // fold (srl c1, c2) -> c1 >>u c2
8766   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8767     return C;
8768
8769   if (SDValue NewSel = foldBinOpIntoSelect(N))
8770     return NewSel;
8771
8772   // if (srl x, c) is known to be zero, return 0
8773   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8774                                    APInt::getAllOnesValue(OpSizeInBits)))
8775     return DAG.getConstant(0, SDLoc(N), VT);
8776
8777   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8778   if (N0.getOpcode() == ISD::SRL) {
8779     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8780                                           ConstantSDNode *RHS) {
8781       APInt c1 = LHS->getAPIntValue();
8782       APInt c2 = RHS->getAPIntValue();
8783       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8784       return (c1 + c2).uge(OpSizeInBits);
8785     };
8786     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8787       return DAG.getConstant(0, SDLoc(N), VT);
8788
8789     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8790                                        ConstantSDNode *RHS) {
8791       APInt c1 = LHS->getAPIntValue();
8792       APInt c2 = RHS->getAPIntValue();
8793       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8794       return (c1 + c2).ult(OpSizeInBits);
8795     };
8796     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8797       SDLoc DL(N);
8798       EVT ShiftVT = N1.getValueType();
8799       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8800       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8801     }
8802   }
8803
8804   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8805       N0.getOperand(0).getOpcode() == ISD::SRL) {
8806     SDValue InnerShift = N0.getOperand(0);
8807     // TODO - support non-uniform vector shift amounts.
8808     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8809       uint64_t c1 = N001C->getZExtValue();
8810       uint64_t c2 = N1C->getZExtValue();
8811       EVT InnerShiftVT = InnerShift.getValueType();
8812       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8813       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8814       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8815       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8816       if (c1 + OpSizeInBits == InnerShiftSize) {
8817         SDLoc DL(N);
8818         if (c1 + c2 >= InnerShiftSize)
8819           return DAG.getConstant(0, DL, VT);
8820         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8821         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8822                                        InnerShift.getOperand(0), NewShiftAmt);
8823         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8824       }
8825       // In the more general case, we can clear the high bits after the shift:
8826       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8827       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8828           c1 + c2 < InnerShiftSize) {
8829         SDLoc DL(N);
8830         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8831         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8832                                        InnerShift.getOperand(0), NewShiftAmt);
8833         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8834                                                             OpSizeInBits - c2),
8835                                        DL, InnerShiftVT);
8836         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8837         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8838       }
8839     }
8840   }
8841
8842   // fold (srl (shl x, c), c) -> (and x, cst2)
8843   // TODO - (srl (shl x, c1), c2).
8844   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8845       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8846     SDLoc DL(N);
8847     SDValue Mask =
8848         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8849     AddToWorklist(Mask.getNode());
8850     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8851   }
8852
8853   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8854   // TODO - support non-uniform vector shift amounts.
8855   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8856     // Shifting in all undef bits?
8857     EVT SmallVT = N0.getOperand(0).getValueType();
8858     unsigned BitSize = SmallVT.getScalarSizeInBits();
8859     if (N1C->getAPIntValue().uge(BitSize))
8860       return DAG.getUNDEF(VT);
8861
8862     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8863       uint64_t ShiftAmt = N1C->getZExtValue();
8864       SDLoc DL0(N0);
8865       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8866                                        N0.getOperand(0),
8867                           DAG.getConstant(ShiftAmt, DL0,
8868                                           getShiftAmountTy(SmallVT)));
8869       AddToWorklist(SmallShift.getNode());
8870       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8871       SDLoc DL(N);
8872       return DAG.getNode(ISD::AND, DL, VT,
8873                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8874                          DAG.getConstant(Mask, DL, VT));
8875     }
8876   }
8877
8878   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
8879   // bit, which is unmodified by sra.
8880   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8881     if (N0.getOpcode() == ISD::SRA)
8882       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8883   }
8884
8885   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
8886   if (N1C && N0.getOpcode() == ISD::CTLZ &&
8887       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8888     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8889
8890     // If any of the input bits are KnownOne, then the input couldn't be all
8891     // zeros, thus the result of the srl will always be zero.
8892     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8893
8894     // If all of the bits input the to ctlz node are known to be zero, then
8895     // the result of the ctlz is "32" and the result of the shift is one.
8896     APInt UnknownBits = ~Known.Zero;
8897     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8898
8899     // Otherwise, check to see if there is exactly one bit input to the ctlz.
8900     if (UnknownBits.isPowerOf2()) {
8901       // Okay, we know that only that the single bit specified by UnknownBits
8902       // could be set on input to the CTLZ node. If this bit is set, the SRL
8903       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8904       // to an SRL/XOR pair, which is likely to simplify more.
8905       unsigned ShAmt = UnknownBits.countTrailingZeros();
8906       SDValue Op = N0.getOperand(0);
8907
8908       if (ShAmt) {
8909         SDLoc DL(N0);
8910         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8911                   DAG.getConstant(ShAmt, DL,
8912                                   getShiftAmountTy(Op.getValueType())));
8913         AddToWorklist(Op.getNode());
8914       }
8915
8916       SDLoc DL(N);
8917       return DAG.getNode(ISD::XOR, DL, VT,
8918                          Op, DAG.getConstant(1, DL, VT));
8919     }
8920   }
8921
8922   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8923   if (N1.getOpcode() == ISD::TRUNCATE &&
8924       N1.getOperand(0).getOpcode() == ISD::AND) {
8925     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8926       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8927   }
8928
8929   // fold operands of srl based on knowledge that the low bits are not
8930   // demanded.
8931   if (SimplifyDemandedBits(SDValue(N, 0)))
8932     return SDValue(N, 0);
8933
8934   if (N1C && !N1C->isOpaque())
8935     if (SDValue NewSRL = visitShiftByConstant(N))
8936       return NewSRL;
8937
8938   // Attempt to convert a srl of a load into a narrower zero-extending load.
8939   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8940     return NarrowLoad;
8941
8942   // Here is a common situation. We want to optimize:
8943   //
8944   //   %a = ...
8945   //   %b = and i32 %a, 2
8946   //   %c = srl i32 %b, 1
8947   //   brcond i32 %c ...
8948   //
8949   // into
8950   //
8951   //   %a = ...
8952   //   %b = and %a, 2
8953   //   %c = setcc eq %b, 0
8954   //   brcond %c ...
8955   //
8956   // However when after the source operand of SRL is optimized into AND, the SRL
8957   // itself may not be optimized further. Look for it and add the BRCOND into
8958   // the worklist.
8959   if (N->hasOneUse()) {
8960     SDNode *Use = *N->use_begin();
8961     if (Use->getOpcode() == ISD::BRCOND)
8962       AddToWorklist(Use);
8963     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8964       // Also look pass the truncate.
8965       Use = *Use->use_begin();
8966       if (Use->getOpcode() == ISD::BRCOND)
8967         AddToWorklist(Use);
8968     }
8969   }
8970
8971   // Try to transform this shift into a multiply-high if
8972   // it matches the appropriate pattern detected in combineShiftToMULH.
8973   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8974     return MULH;
8975
8976   return SDValue();
8977 }
8978
8979 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8980   EVT VT = N->getValueType(0);
8981   SDValue N0 = N->getOperand(0);
8982   SDValue N1 = N->getOperand(1);
8983   SDValue N2 = N->getOperand(2);
8984   bool IsFSHL = N->getOpcode() == ISD::FSHL;
8985   unsigned BitWidth = VT.getScalarSizeInBits();
8986
8987   // fold (fshl N0, N1, 0) -> N0
8988   // fold (fshr N0, N1, 0) -> N1
8989   if (isPowerOf2_32(BitWidth))
8990     if (DAG.MaskedValueIsZero(
8991             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8992       return IsFSHL ? N0 : N1;
8993
8994   auto IsUndefOrZero = [](SDValue V) {
8995     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
8996   };
8997
8998   // TODO - support non-uniform vector shift amounts.
8999   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9000     EVT ShAmtTy = N2.getValueType();
9001
9002     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9003     if (Cst->getAPIntValue().uge(BitWidth)) {
9004       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9005       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9006                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9007     }
9008
9009     unsigned ShAmt = Cst->getZExtValue();
9010     if (ShAmt == 0)
9011       return IsFSHL ? N0 : N1;
9012
9013     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9014     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9015     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9016     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9017     if (IsUndefOrZero(N0))
9018       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9019                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9020                                          SDLoc(N), ShAmtTy));
9021     if (IsUndefOrZero(N1))
9022       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9023                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9024                                          SDLoc(N), ShAmtTy));
9025
9026     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9027     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9028     // TODO - bigendian support once we have test coverage.
9029     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9030     // TODO - permit LHS EXTLOAD if extensions are shifted out.
9031     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9032         !DAG.getDataLayout().isBigEndian()) {
9033       auto *LHS = dyn_cast<LoadSDNode>(N0);
9034       auto *RHS = dyn_cast<LoadSDNode>(N1);
9035       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9036           LHS->getAddressSpace() == RHS->getAddressSpace() &&
9037           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9038           ISD::isNON_EXTLoad(LHS)) {
9039         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9040           SDLoc DL(RHS);
9041           uint64_t PtrOff =
9042               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9043           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9044           bool Fast = false;
9045           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9046                                      RHS->getAddressSpace(), NewAlign,
9047                                      RHS->getMemOperand()->getFlags(), &Fast) &&
9048               Fast) {
9049             SDValue NewPtr = DAG.getMemBasePlusOffset(
9050                 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9051             AddToWorklist(NewPtr.getNode());
9052             SDValue Load = DAG.getLoad(
9053                 VT, DL, RHS->getChain(), NewPtr,
9054                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9055                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9056             // Replace the old load's chain with the new load's chain.
9057             WorklistRemover DeadNodes(*this);
9058             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9059             return Load;
9060           }
9061         }
9062       }
9063     }
9064   }
9065
9066   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9067   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9068   // iff We know the shift amount is in range.
9069   // TODO: when is it worth doing SUB(BW, N2) as well?
9070   if (isPowerOf2_32(BitWidth)) {
9071     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9072     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9073       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9074     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9075       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9076   }
9077
9078   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9079   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9080   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9081   // is legal as well we might be better off avoiding non-constant (BW - N2).
9082   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9083   if (N0 == N1 && hasOperation(RotOpc, VT))
9084     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9085
9086   // Simplify, based on bits shifted out of N0/N1.
9087   if (SimplifyDemandedBits(SDValue(N, 0)))
9088     return SDValue(N, 0);
9089
9090   return SDValue();
9091 }
9092
9093 // Given a ABS node, detect the following pattern:
9094 // (ABS (SUB (EXTEND a), (EXTEND b))).
9095 // Generates UABD/SABD instruction.
9096 static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9097                                const TargetLowering &TLI) {
9098   SDValue AbsOp1 = N->getOperand(0);
9099   SDValue Op0, Op1;
9100
9101   if (AbsOp1.getOpcode() != ISD::SUB)
9102     return SDValue();
9103
9104   Op0 = AbsOp1.getOperand(0);
9105   Op1 = AbsOp1.getOperand(1);
9106
9107   unsigned Opc0 = Op0.getOpcode();
9108   // Check if the operands of the sub are (zero|sign)-extended.
9109   if (Opc0 != Op1.getOpcode() ||
9110       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9111     return SDValue();
9112
9113   EVT VT1 = Op0.getOperand(0).getValueType();
9114   EVT VT2 = Op1.getOperand(0).getValueType();
9115   // Check if the operands are of same type and valid size.
9116   unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9117   if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
9118     return SDValue();
9119
9120   Op0 = Op0.getOperand(0);
9121   Op1 = Op1.getOperand(0);
9122   SDValue ABD =
9123       DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
9124   return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
9125 }
9126
9127 SDValue DAGCombiner::visitABS(SDNode *N) {
9128   SDValue N0 = N->getOperand(0);
9129   EVT VT = N->getValueType(0);
9130
9131   // fold (abs c1) -> c2
9132   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9133     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9134   // fold (abs (abs x)) -> (abs x)
9135   if (N0.getOpcode() == ISD::ABS)
9136     return N0;
9137   // fold (abs x) -> x iff not-negative
9138   if (DAG.SignBitIsZero(N0))
9139     return N0;
9140
9141   if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9142     return ABD;
9143
9144   return SDValue();
9145 }
9146
9147 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9148   SDValue N0 = N->getOperand(0);
9149   EVT VT = N->getValueType(0);
9150
9151   // fold (bswap c1) -> c2
9152   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9153     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9154   // fold (bswap (bswap x)) -> x
9155   if (N0.getOpcode() == ISD::BSWAP)
9156     return N0->getOperand(0);
9157   return SDValue();
9158 }
9159
9160 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9161   SDValue N0 = N->getOperand(0);
9162   EVT VT = N->getValueType(0);
9163
9164   // fold (bitreverse c1) -> c2
9165   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9166     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9167   // fold (bitreverse (bitreverse x)) -> x
9168   if (N0.getOpcode() == ISD::BITREVERSE)
9169     return N0.getOperand(0);
9170   return SDValue();
9171 }
9172
9173 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9174   SDValue N0 = N->getOperand(0);
9175   EVT VT = N->getValueType(0);
9176
9177   // fold (ctlz c1) -> c2
9178   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9179     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9180
9181   // If the value is known never to be zero, switch to the undef version.
9182   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9183     if (DAG.isKnownNeverZero(N0))
9184       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9185   }
9186
9187   return SDValue();
9188 }
9189
9190 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9191   SDValue N0 = N->getOperand(0);
9192   EVT VT = N->getValueType(0);
9193
9194   // fold (ctlz_zero_undef c1) -> c2
9195   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9196     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9197   return SDValue();
9198 }
9199
9200 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9201   SDValue N0 = N->getOperand(0);
9202   EVT VT = N->getValueType(0);
9203
9204   // fold (cttz c1) -> c2
9205   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9206     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9207
9208   // If the value is known never to be zero, switch to the undef version.
9209   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9210     if (DAG.isKnownNeverZero(N0))
9211       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9212   }
9213
9214   return SDValue();
9215 }
9216
9217 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9218   SDValue N0 = N->getOperand(0);
9219   EVT VT = N->getValueType(0);
9220
9221   // fold (cttz_zero_undef c1) -> c2
9222   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9223     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9224   return SDValue();
9225 }
9226
9227 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9228   SDValue N0 = N->getOperand(0);
9229   EVT VT = N->getValueType(0);
9230
9231   // fold (ctpop c1) -> c2
9232   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9233     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9234   return SDValue();
9235 }
9236
9237 // FIXME: This should be checking for no signed zeros on individual operands, as
9238 // well as no nans.
9239 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9240                                          SDValue RHS,
9241                                          const TargetLowering &TLI) {
9242   const TargetOptions &Options = DAG.getTarget().Options;
9243   EVT VT = LHS.getValueType();
9244
9245   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9246          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9247          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9248 }
9249
9250 /// Generate Min/Max node
9251 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9252                                    SDValue RHS, SDValue True, SDValue False,
9253                                    ISD::CondCode CC, const TargetLowering &TLI,
9254                                    SelectionDAG &DAG) {
9255   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9256     return SDValue();
9257
9258   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9259   switch (CC) {
9260   case ISD::SETOLT:
9261   case ISD::SETOLE:
9262   case ISD::SETLT:
9263   case ISD::SETLE:
9264   case ISD::SETULT:
9265   case ISD::SETULE: {
9266     // Since it's known never nan to get here already, either fminnum or
9267     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9268     // expanded in terms of it.
9269     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9270     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9271       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9272
9273     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9274     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9275       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9276     return SDValue();
9277   }
9278   case ISD::SETOGT:
9279   case ISD::SETOGE:
9280   case ISD::SETGT:
9281   case ISD::SETGE:
9282   case ISD::SETUGT:
9283   case ISD::SETUGE: {
9284     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9285     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9286       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9287
9288     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9289     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9290       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9291     return SDValue();
9292   }
9293   default:
9294     return SDValue();
9295   }
9296 }
9297
9298 /// If a (v)select has a condition value that is a sign-bit test, try to smear
9299 /// the condition operand sign-bit across the value width and use it as a mask.
9300 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9301   SDValue Cond = N->getOperand(0);
9302   SDValue C1 = N->getOperand(1);
9303   SDValue C2 = N->getOperand(2);
9304   if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
9305     return SDValue();
9306
9307   EVT VT = N->getValueType(0);
9308   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9309       VT != Cond.getOperand(0).getValueType())
9310     return SDValue();
9311
9312   // The inverted-condition + commuted-select variants of these patterns are
9313   // canonicalized to these forms in IR.
9314   SDValue X = Cond.getOperand(0);
9315   SDValue CondC = Cond.getOperand(1);
9316   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9317   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9318       isAllOnesOrAllOnesSplat(C2)) {
9319     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9320     SDLoc DL(N);
9321     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9322     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9323     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9324   }
9325   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9326     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9327     SDLoc DL(N);
9328     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9329     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9330     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9331   }
9332   return SDValue();
9333 }
9334
9335 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9336   SDValue Cond = N->getOperand(0);
9337   SDValue N1 = N->getOperand(1);
9338   SDValue N2 = N->getOperand(2);
9339   EVT VT = N->getValueType(0);
9340   EVT CondVT = Cond.getValueType();
9341   SDLoc DL(N);
9342
9343   if (!VT.isInteger())
9344     return SDValue();
9345
9346   auto *C1 = dyn_cast<ConstantSDNode>(N1);
9347   auto *C2 = dyn_cast<ConstantSDNode>(N2);
9348   if (!C1 || !C2)
9349     return SDValue();
9350
9351   // Only do this before legalization to avoid conflicting with target-specific
9352   // transforms in the other direction (create a select from a zext/sext). There
9353   // is also a target-independent combine here in DAGCombiner in the other
9354   // direction for (select Cond, -1, 0) when the condition is not i1.
9355   if (CondVT == MVT::i1 && !LegalOperations) {
9356     if (C1->isNullValue() && C2->isOne()) {
9357       // select Cond, 0, 1 --> zext (!Cond)
9358       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9359       if (VT != MVT::i1)
9360         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9361       return NotCond;
9362     }
9363     if (C1->isNullValue() && C2->isAllOnesValue()) {
9364       // select Cond, 0, -1 --> sext (!Cond)
9365       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9366       if (VT != MVT::i1)
9367         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9368       return NotCond;
9369     }
9370     if (C1->isOne() && C2->isNullValue()) {
9371       // select Cond, 1, 0 --> zext (Cond)
9372       if (VT != MVT::i1)
9373         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9374       return Cond;
9375     }
9376     if (C1->isAllOnesValue() && C2->isNullValue()) {
9377       // select Cond, -1, 0 --> sext (Cond)
9378       if (VT != MVT::i1)
9379         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9380       return Cond;
9381     }
9382
9383     // Use a target hook because some targets may prefer to transform in the
9384     // other direction.
9385     if (TLI.convertSelectOfConstantsToMath(VT)) {
9386       // For any constants that differ by 1, we can transform the select into an
9387       // extend and add.
9388       const APInt &C1Val = C1->getAPIntValue();
9389       const APInt &C2Val = C2->getAPIntValue();
9390       if (C1Val - 1 == C2Val) {
9391         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9392         if (VT != MVT::i1)
9393           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9394         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9395       }
9396       if (C1Val + 1 == C2Val) {
9397         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9398         if (VT != MVT::i1)
9399           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9400         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9401       }
9402
9403       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9404       if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
9405         if (VT != MVT::i1)
9406           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9407         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9408         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9409       }
9410
9411       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9412         return V;
9413     }
9414
9415     return SDValue();
9416   }
9417
9418   // fold (select Cond, 0, 1) -> (xor Cond, 1)
9419   // We can't do this reliably if integer based booleans have different contents
9420   // to floating point based booleans. This is because we can't tell whether we
9421   // have an integer-based boolean or a floating-point-based boolean unless we
9422   // can find the SETCC that produced it and inspect its operands. This is
9423   // fairly easy if C is the SETCC node, but it can potentially be
9424   // undiscoverable (or not reasonably discoverable). For example, it could be
9425   // in another basic block or it could require searching a complicated
9426   // expression.
9427   if (CondVT.isInteger() &&
9428       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9429           TargetLowering::ZeroOrOneBooleanContent &&
9430       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9431           TargetLowering::ZeroOrOneBooleanContent &&
9432       C1->isNullValue() && C2->isOne()) {
9433     SDValue NotCond =
9434         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9435     if (VT.bitsEq(CondVT))
9436       return NotCond;
9437     return DAG.getZExtOrTrunc(NotCond, DL, VT);
9438   }
9439
9440   return SDValue();
9441 }
9442
9443 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
9444   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
9445          "Expected a (v)select");
9446   SDValue Cond = N->getOperand(0);
9447   SDValue T = N->getOperand(1), F = N->getOperand(2);
9448   EVT VT = N->getValueType(0);
9449   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
9450     return SDValue();
9451
9452   // select Cond, Cond, F --> or Cond, F
9453   // select Cond, 1, F    --> or Cond, F
9454   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
9455     return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
9456
9457   // select Cond, T, Cond --> and Cond, T
9458   // select Cond, T, 0    --> and Cond, T
9459   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
9460     return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
9461
9462   // select Cond, T, 1 --> or (not Cond), T
9463   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
9464     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9465     return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
9466   }
9467
9468   // select Cond, 0, F --> and (not Cond), F
9469   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
9470     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9471     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
9472   }
9473
9474   return SDValue();
9475 }
9476
9477 SDValue DAGCombiner::visitSELECT(SDNode *N) {
9478   SDValue N0 = N->getOperand(0);
9479   SDValue N1 = N->getOperand(1);
9480   SDValue N2 = N->getOperand(2);
9481   EVT VT = N->getValueType(0);
9482   EVT VT0 = N0.getValueType();
9483   SDLoc DL(N);
9484   SDNodeFlags Flags = N->getFlags();
9485
9486   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9487     return V;
9488
9489   if (SDValue V = foldSelectOfConstants(N))
9490     return V;
9491
9492   if (SDValue V = foldBoolSelectToLogic(N, DAG))
9493     return V;
9494
9495   // If we can fold this based on the true/false value, do so.
9496   if (SimplifySelectOps(N, N1, N2))
9497     return SDValue(N, 0); // Don't revisit N.
9498
9499   if (VT0 == MVT::i1) {
9500     // The code in this block deals with the following 2 equivalences:
9501     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9502     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9503     // The target can specify its preferred form with the
9504     // shouldNormalizeToSelectSequence() callback. However we always transform
9505     // to the right anyway if we find the inner select exists in the DAG anyway
9506     // and we always transform to the left side if we know that we can further
9507     // optimize the combination of the conditions.
9508     bool normalizeToSequence =
9509         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9510     // select (and Cond0, Cond1), X, Y
9511     //   -> select Cond0, (select Cond1, X, Y), Y
9512     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9513       SDValue Cond0 = N0->getOperand(0);
9514       SDValue Cond1 = N0->getOperand(1);
9515       SDValue InnerSelect =
9516           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9517       if (normalizeToSequence || !InnerSelect.use_empty())
9518         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9519                            InnerSelect, N2, Flags);
9520       // Cleanup on failure.
9521       if (InnerSelect.use_empty())
9522         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9523     }
9524     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9525     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9526       SDValue Cond0 = N0->getOperand(0);
9527       SDValue Cond1 = N0->getOperand(1);
9528       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9529                                         Cond1, N1, N2, Flags);
9530       if (normalizeToSequence || !InnerSelect.use_empty())
9531         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9532                            InnerSelect, Flags);
9533       // Cleanup on failure.
9534       if (InnerSelect.use_empty())
9535         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9536     }
9537
9538     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9539     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9540       SDValue N1_0 = N1->getOperand(0);
9541       SDValue N1_1 = N1->getOperand(1);
9542       SDValue N1_2 = N1->getOperand(2);
9543       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9544         // Create the actual and node if we can generate good code for it.
9545         if (!normalizeToSequence) {
9546           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9547           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9548                              N2, Flags);
9549         }
9550         // Otherwise see if we can optimize the "and" to a better pattern.
9551         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9552           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9553                              N2, Flags);
9554         }
9555       }
9556     }
9557     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9558     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9559       SDValue N2_0 = N2->getOperand(0);
9560       SDValue N2_1 = N2->getOperand(1);
9561       SDValue N2_2 = N2->getOperand(2);
9562       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9563         // Create the actual or node if we can generate good code for it.
9564         if (!normalizeToSequence) {
9565           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9566           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9567                              N2_2, Flags);
9568         }
9569         // Otherwise see if we can optimize to a better pattern.
9570         if (SDValue Combined = visitORLike(N0, N2_0, N))
9571           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9572                              N2_2, Flags);
9573       }
9574     }
9575   }
9576
9577   // select (not Cond), N1, N2 -> select Cond, N2, N1
9578   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9579     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9580     SelectOp->setFlags(Flags);
9581     return SelectOp;
9582   }
9583
9584   // Fold selects based on a setcc into other things, such as min/max/abs.
9585   if (N0.getOpcode() == ISD::SETCC) {
9586     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9587     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9588
9589     // select (fcmp lt x, y), x, y -> fminnum x, y
9590     // select (fcmp gt x, y), x, y -> fmaxnum x, y
9591     //
9592     // This is OK if we don't care what happens if either operand is a NaN.
9593     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9594       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9595                                                 CC, TLI, DAG))
9596         return FMinMax;
9597
9598     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9599     // This is conservatively limited to pre-legal-operations to give targets
9600     // a chance to reverse the transform if they want to do that. Also, it is
9601     // unlikely that the pattern would be formed late, so it's probably not
9602     // worth going through the other checks.
9603     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9604         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9605         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9606       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9607       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9608       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9609         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9610         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9611         //
9612         // The IR equivalent of this transform would have this form:
9613         //   %a = add %x, C
9614         //   %c = icmp ugt %x, ~C
9615         //   %r = select %c, -1, %a
9616         //   =>
9617         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9618         //   %u0 = extractvalue %u, 0
9619         //   %u1 = extractvalue %u, 1
9620         //   %r = select %u1, -1, %u0
9621         SDVTList VTs = DAG.getVTList(VT, VT0);
9622         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9623         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9624       }
9625     }
9626
9627     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9628         (!LegalOperations &&
9629          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9630       // Any flags available in a select/setcc fold will be on the setcc as they
9631       // migrated from fcmp
9632       Flags = N0.getNode()->getFlags();
9633       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9634                                        N2, N0.getOperand(2));
9635       SelectNode->setFlags(Flags);
9636       return SelectNode;
9637     }
9638
9639     if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
9640       return NewSel;
9641   }
9642
9643   if (!VT.isVector())
9644     if (SDValue BinOp = foldSelectOfBinops(N))
9645       return BinOp;
9646
9647   return SDValue();
9648 }
9649
9650 // This function assumes all the vselect's arguments are CONCAT_VECTOR
9651 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9652 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9653   SDLoc DL(N);
9654   SDValue Cond = N->getOperand(0);
9655   SDValue LHS = N->getOperand(1);
9656   SDValue RHS = N->getOperand(2);
9657   EVT VT = N->getValueType(0);
9658   int NumElems = VT.getVectorNumElements();
9659   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
9660          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
9661          Cond.getOpcode() == ISD::BUILD_VECTOR);
9662
9663   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9664   // binary ones here.
9665   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9666     return SDValue();
9667
9668   // We're sure we have an even number of elements due to the
9669   // concat_vectors we have as arguments to vselect.
9670   // Skip BV elements until we find one that's not an UNDEF
9671   // After we find an UNDEF element, keep looping until we get to half the
9672   // length of the BV and see if all the non-undef nodes are the same.
9673   ConstantSDNode *BottomHalf = nullptr;
9674   for (int i = 0; i < NumElems / 2; ++i) {
9675     if (Cond->getOperand(i)->isUndef())
9676       continue;
9677
9678     if (BottomHalf == nullptr)
9679       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9680     else if (Cond->getOperand(i).getNode() != BottomHalf)
9681       return SDValue();
9682   }
9683
9684   // Do the same for the second half of the BuildVector
9685   ConstantSDNode *TopHalf = nullptr;
9686   for (int i = NumElems / 2; i < NumElems; ++i) {
9687     if (Cond->getOperand(i)->isUndef())
9688       continue;
9689
9690     if (TopHalf == nullptr)
9691       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9692     else if (Cond->getOperand(i).getNode() != TopHalf)
9693       return SDValue();
9694   }
9695
9696   assert(TopHalf && BottomHalf &&
9697          "One half of the selector was all UNDEFs and the other was all the "
9698          "same value. This should have been addressed before this function.");
9699   return DAG.getNode(
9700       ISD::CONCAT_VECTORS, DL, VT,
9701       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9702       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9703 }
9704
9705 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
9706   if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
9707     return false;
9708
9709   // For now we check only the LHS of the add.
9710   SDValue LHS = Index.getOperand(0);
9711   SDValue SplatVal = DAG.getSplatValue(LHS);
9712   if (!SplatVal)
9713     return false;
9714
9715   BasePtr = SplatVal;
9716   Index = Index.getOperand(1);
9717   return true;
9718 }
9719
9720 // Fold sext/zext of index into index type.
9721 bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
9722                      bool Scaled, SelectionDAG &DAG) {
9723   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9724
9725   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
9726     SDValue Op = Index.getOperand(0);
9727     MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
9728     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9729       Index = Op;
9730       return true;
9731     }
9732   }
9733
9734   if (Index.getOpcode() == ISD::SIGN_EXTEND) {
9735     SDValue Op = Index.getOperand(0);
9736     MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
9737     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9738       Index = Op;
9739       return true;
9740     }
9741   }
9742
9743   return false;
9744 }
9745
9746 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9747   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9748   SDValue Mask = MSC->getMask();
9749   SDValue Chain = MSC->getChain();
9750   SDValue Index = MSC->getIndex();
9751   SDValue Scale = MSC->getScale();
9752   SDValue StoreVal = MSC->getValue();
9753   SDValue BasePtr = MSC->getBasePtr();
9754   SDLoc DL(N);
9755
9756   // Zap scatters with a zero mask.
9757   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9758     return Chain;
9759
9760   if (refineUniformBase(BasePtr, Index, DAG)) {
9761     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9762     return DAG.getMaskedScatter(
9763         DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
9764         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9765   }
9766
9767   if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
9768     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9769     return DAG.getMaskedScatter(
9770         DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
9771         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9772   }
9773
9774   return SDValue();
9775 }
9776
9777 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9778   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9779   SDValue Mask = MST->getMask();
9780   SDValue Chain = MST->getChain();
9781   SDLoc DL(N);
9782
9783   // Zap masked stores with a zero mask.
9784   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9785     return Chain;
9786
9787   // If this is a masked load with an all ones mask, we can use a unmasked load.
9788   // FIXME: Can we do this for indexed, compressing, or truncating stores?
9789   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9790       MST->isUnindexed() && !MST->isCompressingStore() &&
9791       !MST->isTruncatingStore())
9792     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
9793                         MST->getBasePtr(), MST->getMemOperand());
9794
9795   // Try transforming N to an indexed store.
9796   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9797     return SDValue(N, 0);
9798
9799   return SDValue();
9800 }
9801
9802 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9803   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9804   SDValue Mask = MGT->getMask();
9805   SDValue Chain = MGT->getChain();
9806   SDValue Index = MGT->getIndex();
9807   SDValue Scale = MGT->getScale();
9808   SDValue PassThru = MGT->getPassThru();
9809   SDValue BasePtr = MGT->getBasePtr();
9810   SDLoc DL(N);
9811
9812   // Zap gathers with a zero mask.
9813   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9814     return CombineTo(N, PassThru, MGT->getChain());
9815
9816   if (refineUniformBase(BasePtr, Index, DAG)) {
9817     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9818     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9819                                MGT->getMemoryVT(), DL, Ops,
9820                                MGT->getMemOperand(), MGT->getIndexType(),
9821                                MGT->getExtensionType());
9822   }
9823
9824   if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
9825     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9826     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9827                                MGT->getMemoryVT(), DL, Ops,
9828                                MGT->getMemOperand(), MGT->getIndexType(),
9829                                MGT->getExtensionType());
9830   }
9831
9832   return SDValue();
9833 }
9834
9835 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9836   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9837   SDValue Mask = MLD->getMask();
9838   SDLoc DL(N);
9839
9840   // Zap masked loads with a zero mask.
9841   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9842     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9843
9844   // If this is a masked load with an all ones mask, we can use a unmasked load.
9845   // FIXME: Can we do this for indexed, expanding, or extending loads?
9846   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9847       MLD->isUnindexed() && !MLD->isExpandingLoad() &&
9848       MLD->getExtensionType() == ISD::NON_EXTLOAD) {
9849     SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
9850                                 MLD->getBasePtr(), MLD->getMemOperand());
9851     return CombineTo(N, NewLd, NewLd.getValue(1));
9852   }
9853
9854   // Try transforming N to an indexed load.
9855   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9856     return SDValue(N, 0);
9857
9858   return SDValue();
9859 }
9860
9861 /// A vector select of 2 constant vectors can be simplified to math/logic to
9862 /// avoid a variable select instruction and possibly avoid constant loads.
9863 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9864   SDValue Cond = N->getOperand(0);
9865   SDValue N1 = N->getOperand(1);
9866   SDValue N2 = N->getOperand(2);
9867   EVT VT = N->getValueType(0);
9868   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9869       !TLI.convertSelectOfConstantsToMath(VT) ||
9870       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9871       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9872     return SDValue();
9873
9874   // Check if we can use the condition value to increment/decrement a single
9875   // constant value. This simplifies a select to an add and removes a constant
9876   // load/materialization from the general case.
9877   bool AllAddOne = true;
9878   bool AllSubOne = true;
9879   unsigned Elts = VT.getVectorNumElements();
9880   for (unsigned i = 0; i != Elts; ++i) {
9881     SDValue N1Elt = N1.getOperand(i);
9882     SDValue N2Elt = N2.getOperand(i);
9883     if (N1Elt.isUndef() || N2Elt.isUndef())
9884       continue;
9885     if (N1Elt.getValueType() != N2Elt.getValueType())
9886       continue;
9887
9888     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9889     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9890     if (C1 != C2 + 1)
9891       AllAddOne = false;
9892     if (C1 != C2 - 1)
9893       AllSubOne = false;
9894   }
9895
9896   // Further simplifications for the extra-special cases where the constants are
9897   // all 0 or all -1 should be implemented as folds of these patterns.
9898   SDLoc DL(N);
9899   if (AllAddOne || AllSubOne) {
9900     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9901     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9902     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9903     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9904     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9905   }
9906
9907   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9908   APInt Pow2C;
9909   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9910       isNullOrNullSplat(N2)) {
9911     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9912     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9913     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9914   }
9915
9916   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9917     return V;
9918
9919   // The general case for select-of-constants:
9920   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9921   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9922   // leave that to a machine-specific pass.
9923   return SDValue();
9924 }
9925
9926 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9927   SDValue N0 = N->getOperand(0);
9928   SDValue N1 = N->getOperand(1);
9929   SDValue N2 = N->getOperand(2);
9930   EVT VT = N->getValueType(0);
9931   SDLoc DL(N);
9932
9933   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9934     return V;
9935
9936   if (SDValue V = foldBoolSelectToLogic(N, DAG))
9937     return V;
9938
9939   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9940   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9941     return DAG.getSelect(DL, VT, F, N2, N1);
9942
9943   // Canonicalize integer abs.
9944   // vselect (setg[te] X,  0),  X, -X ->
9945   // vselect (setgt    X, -1),  X, -X ->
9946   // vselect (setl[te] X,  0), -X,  X ->
9947   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9948   if (N0.getOpcode() == ISD::SETCC) {
9949     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9950     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9951     bool isAbs = false;
9952     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9953
9954     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9955          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9956         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9957       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9958     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9959              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9960       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9961
9962     if (isAbs) {
9963       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9964         return DAG.getNode(ISD::ABS, DL, VT, LHS);
9965
9966       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9967                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
9968                                                   DL, getShiftAmountTy(VT)));
9969       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9970       AddToWorklist(Shift.getNode());
9971       AddToWorklist(Add.getNode());
9972       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9973     }
9974
9975     // vselect x, y (fcmp lt x, y) -> fminnum x, y
9976     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9977     //
9978     // This is OK if we don't care about what happens if either operand is a
9979     // NaN.
9980     //
9981     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
9982       if (SDValue FMinMax =
9983               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
9984         return FMinMax;
9985     }
9986
9987     // If this select has a condition (setcc) with narrower operands than the
9988     // select, try to widen the compare to match the select width.
9989     // TODO: This should be extended to handle any constant.
9990     // TODO: This could be extended to handle non-loading patterns, but that
9991     //       requires thorough testing to avoid regressions.
9992     if (isNullOrNullSplat(RHS)) {
9993       EVT NarrowVT = LHS.getValueType();
9994       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
9995       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
9996       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
9997       unsigned WideWidth = WideVT.getScalarSizeInBits();
9998       bool IsSigned = isSignedIntSetCC(CC);
9999       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10000       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10001           SetCCWidth != 1 && SetCCWidth < WideWidth &&
10002           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10003           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10004         // Both compare operands can be widened for free. The LHS can use an
10005         // extended load, and the RHS is a constant:
10006         //   vselect (ext (setcc load(X), C)), N1, N2 -->
10007         //   vselect (setcc extload(X), C'), N1, N2
10008         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10009         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10010         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10011         EVT WideSetCCVT = getSetCCResultType(WideVT);
10012         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10013         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10014       }
10015     }
10016
10017     // Match VSELECTs into add with unsigned saturation.
10018     if (hasOperation(ISD::UADDSAT, VT)) {
10019       // Check if one of the arms of the VSELECT is vector with all bits set.
10020       // If it's on the left side invert the predicate to simplify logic below.
10021       SDValue Other;
10022       ISD::CondCode SatCC = CC;
10023       if (ISD::isBuildVectorAllOnes(N1.getNode())) {
10024         Other = N2;
10025         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10026       } else if (ISD::isBuildVectorAllOnes(N2.getNode())) {
10027         Other = N1;
10028       }
10029
10030       if (Other && Other.getOpcode() == ISD::ADD) {
10031         SDValue CondLHS = LHS, CondRHS = RHS;
10032         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10033
10034         // Canonicalize condition operands.
10035         if (SatCC == ISD::SETUGE) {
10036           std::swap(CondLHS, CondRHS);
10037           SatCC = ISD::SETULE;
10038         }
10039
10040         // We can test against either of the addition operands.
10041         // x <= x+y ? x+y : ~0 --> uaddsat x, y
10042         // x+y >= x ? x+y : ~0 --> uaddsat x, y
10043         if (SatCC == ISD::SETULE && Other == CondRHS &&
10044             (OpLHS == CondLHS || OpRHS == CondLHS))
10045           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10046
10047         if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
10048             CondLHS == OpLHS) {
10049           // If the RHS is a constant we have to reverse the const
10050           // canonicalization.
10051           // x >= ~C ? x+C : ~0 --> uaddsat x, C
10052           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10053             return Cond->getAPIntValue() == ~Op->getAPIntValue();
10054           };
10055           if (SatCC == ISD::SETULE &&
10056               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10057             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10058         }
10059       }
10060     }
10061
10062     // Match VSELECTs into sub with unsigned saturation.
10063     if (hasOperation(ISD::USUBSAT, VT)) {
10064       // Check if one of the arms of the VSELECT is a zero vector. If it's on
10065       // the left side invert the predicate to simplify logic below.
10066       SDValue Other;
10067       ISD::CondCode SatCC = CC;
10068       if (ISD::isBuildVectorAllZeros(N1.getNode())) {
10069         Other = N2;
10070         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10071       } else if (ISD::isBuildVectorAllZeros(N2.getNode())) {
10072         Other = N1;
10073       }
10074
10075       if (Other && Other.getNumOperands() == 2) {
10076         SDValue CondRHS = RHS;
10077         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10078
10079         if (Other.getOpcode() == ISD::SUB &&
10080             LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10081             OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10082           // Look for a general sub with unsigned saturation first.
10083           // zext(x) >= y ? x - trunc(y) : 0
10084           // --> usubsat(x,trunc(umin(y,SatLimit)))
10085           // zext(x) >  y ? x - trunc(y) : 0
10086           // --> usubsat(x,trunc(umin(y,SatLimit)))
10087           if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10088             return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10089                                        DL);
10090         }
10091
10092         if (OpLHS == LHS) {
10093           // Look for a general sub with unsigned saturation first.
10094           // x >= y ? x-y : 0 --> usubsat x, y
10095           // x >  y ? x-y : 0 --> usubsat x, y
10096           if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10097               Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10098             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10099
10100           if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
10101             if (isa<BuildVectorSDNode>(CondRHS)) {
10102               // If the RHS is a constant we have to reverse the const
10103               // canonicalization.
10104               // x > C-1 ? x+-C : 0 --> usubsat x, C
10105               auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10106                 return (!Op && !Cond) ||
10107                        (Op && Cond &&
10108                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10109               };
10110               if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10111                   ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10112                                             /*AllowUndefs*/ true)) {
10113                 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10114                                     DAG.getConstant(0, DL, VT), OpRHS);
10115                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10116               }
10117
10118               // Another special case: If C was a sign bit, the sub has been
10119               // canonicalized into a xor.
10120               // FIXME: Would it be better to use computeKnownBits to determine
10121               //        whether it's safe to decanonicalize the xor?
10122               // x s< 0 ? x^C : 0 --> usubsat x, C
10123               if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
10124                 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10125                     ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
10126                     OpRHSConst->getAPIntValue().isSignMask()) {
10127                   // Note that we have to rebuild the RHS constant here to
10128                   // ensure we don't rely on particular values of undef lanes.
10129                   OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
10130                   return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10131                 }
10132               }
10133             }
10134           }
10135         }
10136       }
10137     }
10138   }
10139
10140   if (SimplifySelectOps(N, N1, N2))
10141     return SDValue(N, 0);  // Don't revisit N.
10142
10143   // Fold (vselect all_ones, N1, N2) -> N1
10144   if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
10145     return N1;
10146   // Fold (vselect all_zeros, N1, N2) -> N2
10147   if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
10148     return N2;
10149
10150   // The ConvertSelectToConcatVector function is assuming both the above
10151   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10152   // and addressed.
10153   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10154       N2.getOpcode() == ISD::CONCAT_VECTORS &&
10155       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
10156     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
10157       return CV;
10158   }
10159
10160   if (SDValue V = foldVSelectOfConstants(N))
10161     return V;
10162
10163   return SDValue();
10164 }
10165
10166 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10167   SDValue N0 = N->getOperand(0);
10168   SDValue N1 = N->getOperand(1);
10169   SDValue N2 = N->getOperand(2);
10170   SDValue N3 = N->getOperand(3);
10171   SDValue N4 = N->getOperand(4);
10172   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10173
10174   // fold select_cc lhs, rhs, x, x, cc -> x
10175   if (N2 == N3)
10176     return N2;
10177
10178   // Determine if the condition we're dealing with is constant
10179   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10180                                   CC, SDLoc(N), false)) {
10181     AddToWorklist(SCC.getNode());
10182
10183     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10184       if (!SCCC->isNullValue())
10185         return N2;    // cond always true -> true val
10186       else
10187         return N3;    // cond always false -> false val
10188     } else if (SCC->isUndef()) {
10189       // When the condition is UNDEF, just return the first operand. This is
10190       // coherent the DAG creation, no setcc node is created in this case
10191       return N2;
10192     } else if (SCC.getOpcode() == ISD::SETCC) {
10193       // Fold to a simpler select_cc
10194       SDValue SelectOp = DAG.getNode(
10195           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10196           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10197       SelectOp->setFlags(SCC->getFlags());
10198       return SelectOp;
10199     }
10200   }
10201
10202   // If we can fold this based on the true/false value, do so.
10203   if (SimplifySelectOps(N, N2, N3))
10204     return SDValue(N, 0);  // Don't revisit N.
10205
10206   // fold select_cc into other things, such as min/max/abs
10207   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10208 }
10209
10210 SDValue DAGCombiner::visitSETCC(SDNode *N) {
10211   // setcc is very commonly used as an argument to brcond. This pattern
10212   // also lend itself to numerous combines and, as a result, it is desired
10213   // we keep the argument to a brcond as a setcc as much as possible.
10214   bool PreferSetCC =
10215       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10216
10217   SDValue Combined = SimplifySetCC(
10218       N->getValueType(0), N->getOperand(0), N->getOperand(1),
10219       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
10220
10221   if (!Combined)
10222     return SDValue();
10223
10224   // If we prefer to have a setcc, and we don't, we'll try our best to
10225   // recreate one using rebuildSetCC.
10226   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10227     SDValue NewSetCC = rebuildSetCC(Combined);
10228
10229     // We don't have anything interesting to combine to.
10230     if (NewSetCC.getNode() == N)
10231       return SDValue();
10232
10233     if (NewSetCC)
10234       return NewSetCC;
10235   }
10236
10237   return Combined;
10238 }
10239
10240 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10241   SDValue LHS = N->getOperand(0);
10242   SDValue RHS = N->getOperand(1);
10243   SDValue Carry = N->getOperand(2);
10244   SDValue Cond = N->getOperand(3);
10245
10246   // If Carry is false, fold to a regular SETCC.
10247   if (isNullConstant(Carry))
10248     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10249
10250   return SDValue();
10251 }
10252
10253 /// Check if N satisfies:
10254 ///   N is used once.
10255 ///   N is a Load.
10256 ///   The load is compatible with ExtOpcode. It means
10257 ///     If load has explicit zero/sign extension, ExpOpcode must have the same
10258 ///     extension.
10259 ///     Otherwise returns true.
10260 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10261   if (!N.hasOneUse())
10262     return false;
10263
10264   if (!isa<LoadSDNode>(N))
10265     return false;
10266
10267   LoadSDNode *Load = cast<LoadSDNode>(N);
10268   ISD::LoadExtType LoadExt = Load->getExtensionType();
10269   if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10270     return true;
10271
10272   // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10273   // extension.
10274   if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10275       (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10276     return false;
10277
10278   return true;
10279 }
10280
10281 /// Fold
10282 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10283 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10284 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10285 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10286 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10287 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10288                                          SelectionDAG &DAG) {
10289   unsigned Opcode = N->getOpcode();
10290   SDValue N0 = N->getOperand(0);
10291   EVT VT = N->getValueType(0);
10292   SDLoc DL(N);
10293
10294   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10295           Opcode == ISD::ANY_EXTEND) &&
10296          "Expected EXTEND dag node in input!");
10297
10298   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10299       !N0.hasOneUse())
10300     return SDValue();
10301
10302   SDValue Op1 = N0->getOperand(1);
10303   SDValue Op2 = N0->getOperand(2);
10304   if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10305     return SDValue();
10306
10307   auto ExtLoadOpcode = ISD::EXTLOAD;
10308   if (Opcode == ISD::SIGN_EXTEND)
10309     ExtLoadOpcode = ISD::SEXTLOAD;
10310   else if (Opcode == ISD::ZERO_EXTEND)
10311     ExtLoadOpcode = ISD::ZEXTLOAD;
10312
10313   LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10314   LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10315   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10316       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10317     return SDValue();
10318
10319   SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10320   SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10321   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10322 }
10323
10324 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10325 /// a build_vector of constants.
10326 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10327 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10328 /// Vector extends are not folded if operations are legal; this is to
10329 /// avoid introducing illegal build_vector dag nodes.
10330 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
10331                                          SelectionDAG &DAG, bool LegalTypes) {
10332   unsigned Opcode = N->getOpcode();
10333   SDValue N0 = N->getOperand(0);
10334   EVT VT = N->getValueType(0);
10335   SDLoc DL(N);
10336
10337   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10338          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
10339          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
10340          && "Expected EXTEND dag node in input!");
10341
10342   // fold (sext c1) -> c1
10343   // fold (zext c1) -> c1
10344   // fold (aext c1) -> c1
10345   if (isa<ConstantSDNode>(N0))
10346     return DAG.getNode(Opcode, DL, VT, N0);
10347
10348   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10349   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10350   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10351   if (N0->getOpcode() == ISD::SELECT) {
10352     SDValue Op1 = N0->getOperand(1);
10353     SDValue Op2 = N0->getOperand(2);
10354     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10355         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10356       // For any_extend, choose sign extension of the constants to allow a
10357       // possible further transform to sign_extend_inreg.i.e.
10358       //
10359       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10360       // t2: i64 = any_extend t1
10361       // -->
10362       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10363       // -->
10364       // t4: i64 = sign_extend_inreg t3
10365       unsigned FoldOpc = Opcode;
10366       if (FoldOpc == ISD::ANY_EXTEND)
10367         FoldOpc = ISD::SIGN_EXTEND;
10368       return DAG.getSelect(DL, VT, N0->getOperand(0),
10369                            DAG.getNode(FoldOpc, DL, VT, Op1),
10370                            DAG.getNode(FoldOpc, DL, VT, Op2));
10371     }
10372   }
10373
10374   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10375   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10376   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10377   EVT SVT = VT.getScalarType();
10378   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10379       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10380     return SDValue();
10381
10382   // We can fold this node into a build_vector.
10383   unsigned VTBits = SVT.getSizeInBits();
10384   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10385   SmallVector<SDValue, 8> Elts;
10386   unsigned NumElts = VT.getVectorNumElements();
10387
10388   // For zero-extensions, UNDEF elements still guarantee to have the upper
10389   // bits set to zero.
10390   bool IsZext =
10391       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10392
10393   for (unsigned i = 0; i != NumElts; ++i) {
10394     SDValue Op = N0.getOperand(i);
10395     if (Op.isUndef()) {
10396       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10397       continue;
10398     }
10399
10400     SDLoc DL(Op);
10401     // Get the constant value and if needed trunc it to the size of the type.
10402     // Nodes like build_vector might have constants wider than the scalar type.
10403     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10404     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10405       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10406     else
10407       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10408   }
10409
10410   return DAG.getBuildVector(VT, DL, Elts);
10411 }
10412
10413 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10414 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10415 // transformation. Returns true if extension are possible and the above
10416 // mentioned transformation is profitable.
10417 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10418                                     unsigned ExtOpc,
10419                                     SmallVectorImpl<SDNode *> &ExtendNodes,
10420                                     const TargetLowering &TLI) {
10421   bool HasCopyToRegUses = false;
10422   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10423   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10424                             UE = N0.getNode()->use_end();
10425        UI != UE; ++UI) {
10426     SDNode *User = *UI;
10427     if (User == N)
10428       continue;
10429     if (UI.getUse().getResNo() != N0.getResNo())
10430       continue;
10431     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10432     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10433       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10434       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10435         // Sign bits will be lost after a zext.
10436         return false;
10437       bool Add = false;
10438       for (unsigned i = 0; i != 2; ++i) {
10439         SDValue UseOp = User->getOperand(i);
10440         if (UseOp == N0)
10441           continue;
10442         if (!isa<ConstantSDNode>(UseOp))
10443           return false;
10444         Add = true;
10445       }
10446       if (Add)
10447         ExtendNodes.push_back(User);
10448       continue;
10449     }
10450     // If truncates aren't free and there are users we can't
10451     // extend, it isn't worthwhile.
10452     if (!isTruncFree)
10453       return false;
10454     // Remember if this value is live-out.
10455     if (User->getOpcode() == ISD::CopyToReg)
10456       HasCopyToRegUses = true;
10457   }
10458
10459   if (HasCopyToRegUses) {
10460     bool BothLiveOut = false;
10461     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10462          UI != UE; ++UI) {
10463       SDUse &Use = UI.getUse();
10464       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10465         BothLiveOut = true;
10466         break;
10467       }
10468     }
10469     if (BothLiveOut)
10470       // Both unextended and extended values are live out. There had better be
10471       // a good reason for the transformation.
10472       return ExtendNodes.size();
10473   }
10474   return true;
10475 }
10476
10477 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10478                                   SDValue OrigLoad, SDValue ExtLoad,
10479                                   ISD::NodeType ExtType) {
10480   // Extend SetCC uses if necessary.
10481   SDLoc DL(ExtLoad);
10482   for (SDNode *SetCC : SetCCs) {
10483     SmallVector<SDValue, 4> Ops;
10484
10485     for (unsigned j = 0; j != 2; ++j) {
10486       SDValue SOp = SetCC->getOperand(j);
10487       if (SOp == OrigLoad)
10488         Ops.push_back(ExtLoad);
10489       else
10490         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10491     }
10492
10493     Ops.push_back(SetCC->getOperand(2));
10494     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10495   }
10496 }
10497
10498 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10499 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
10500   SDValue N0 = N->getOperand(0);
10501   EVT DstVT = N->getValueType(0);
10502   EVT SrcVT = N0.getValueType();
10503
10504   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10505           N->getOpcode() == ISD::ZERO_EXTEND) &&
10506          "Unexpected node type (not an extend)!");
10507
10508   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
10509   // For example, on a target with legal v4i32, but illegal v8i32, turn:
10510   //   (v8i32 (sext (v8i16 (load x))))
10511   // into:
10512   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
10513   //                          (v4i32 (sextload (x + 16)))))
10514   // Where uses of the original load, i.e.:
10515   //   (v8i16 (load x))
10516   // are replaced with:
10517   //   (v8i16 (truncate
10518   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
10519   //                            (v4i32 (sextload (x + 16)))))))
10520   //
10521   // This combine is only applicable to illegal, but splittable, vectors.
10522   // All legal types, and illegal non-vector types, are handled elsewhere.
10523   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
10524   //
10525   if (N0->getOpcode() != ISD::LOAD)
10526     return SDValue();
10527
10528   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10529
10530   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
10531       !N0.hasOneUse() || !LN0->isSimple() ||
10532       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
10533       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10534     return SDValue();
10535
10536   SmallVector<SDNode *, 4> SetCCs;
10537   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
10538     return SDValue();
10539
10540   ISD::LoadExtType ExtType =
10541       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10542
10543   // Try to split the vector types to get down to legal types.
10544   EVT SplitSrcVT = SrcVT;
10545   EVT SplitDstVT = DstVT;
10546   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
10547          SplitSrcVT.getVectorNumElements() > 1) {
10548     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
10549     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
10550   }
10551
10552   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
10553     return SDValue();
10554
10555   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
10556
10557   SDLoc DL(N);
10558   const unsigned NumSplits =
10559       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
10560   const unsigned Stride = SplitSrcVT.getStoreSize();
10561   SmallVector<SDValue, 4> Loads;
10562   SmallVector<SDValue, 4> Chains;
10563
10564   SDValue BasePtr = LN0->getBasePtr();
10565   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
10566     const unsigned Offset = Idx * Stride;
10567     const Align Align = commonAlignment(LN0->getAlign(), Offset);
10568
10569     SDValue SplitLoad = DAG.getExtLoad(
10570         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
10571         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
10572         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10573
10574     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
10575
10576     Loads.push_back(SplitLoad.getValue(0));
10577     Chains.push_back(SplitLoad.getValue(1));
10578   }
10579
10580   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10581   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
10582
10583   // Simplify TF.
10584   AddToWorklist(NewChain.getNode());
10585
10586   CombineTo(N, NewValue);
10587
10588   // Replace uses of the original load (before extension)
10589   // with a truncate of the concatenated sextloaded vectors.
10590   SDValue Trunc =
10591       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
10592   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
10593   CombineTo(N0.getNode(), Trunc, NewChain);
10594   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10595 }
10596
10597 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10598 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10599 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
10600   assert(N->getOpcode() == ISD::ZERO_EXTEND);
10601   EVT VT = N->getValueType(0);
10602   EVT OrigVT = N->getOperand(0).getValueType();
10603   if (TLI.isZExtFree(OrigVT, VT))
10604     return SDValue();
10605
10606   // and/or/xor
10607   SDValue N0 = N->getOperand(0);
10608   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10609         N0.getOpcode() == ISD::XOR) ||
10610       N0.getOperand(1).getOpcode() != ISD::Constant ||
10611       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
10612     return SDValue();
10613
10614   // shl/shr
10615   SDValue N1 = N0->getOperand(0);
10616   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
10617       N1.getOperand(1).getOpcode() != ISD::Constant ||
10618       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
10619     return SDValue();
10620
10621   // load
10622   if (!isa<LoadSDNode>(N1.getOperand(0)))
10623     return SDValue();
10624   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
10625   EVT MemVT = Load->getMemoryVT();
10626   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
10627       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
10628     return SDValue();
10629
10630
10631   // If the shift op is SHL, the logic op must be AND, otherwise the result
10632   // will be wrong.
10633   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
10634     return SDValue();
10635
10636   if (!N0.hasOneUse() || !N1.hasOneUse())
10637     return SDValue();
10638
10639   SmallVector<SDNode*, 4> SetCCs;
10640   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
10641                                ISD::ZERO_EXTEND, SetCCs, TLI))
10642     return SDValue();
10643
10644   // Actually do the transformation.
10645   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
10646                                    Load->getChain(), Load->getBasePtr(),
10647                                    Load->getMemoryVT(), Load->getMemOperand());
10648
10649   SDLoc DL1(N1);
10650   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
10651                               N1.getOperand(1));
10652
10653   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10654   SDLoc DL0(N0);
10655   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
10656                             DAG.getConstant(Mask, DL0, VT));
10657
10658   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10659   CombineTo(N, And);
10660   if (SDValue(Load, 0).hasOneUse()) {
10661     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
10662   } else {
10663     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
10664                                 Load->getValueType(0), ExtLoad);
10665     CombineTo(Load, Trunc, ExtLoad.getValue(1));
10666   }
10667
10668   // N0 is dead at this point.
10669   recursivelyDeleteUnusedNodes(N0.getNode());
10670
10671   return SDValue(N,0); // Return N so it doesn't get rechecked!
10672 }
10673
10674 /// If we're narrowing or widening the result of a vector select and the final
10675 /// size is the same size as a setcc (compare) feeding the select, then try to
10676 /// apply the cast operation to the select's operands because matching vector
10677 /// sizes for a select condition and other operands should be more efficient.
10678 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
10679   unsigned CastOpcode = Cast->getOpcode();
10680   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
10681           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
10682           CastOpcode == ISD::FP_ROUND) &&
10683          "Unexpected opcode for vector select narrowing/widening");
10684
10685   // We only do this transform before legal ops because the pattern may be
10686   // obfuscated by target-specific operations after legalization. Do not create
10687   // an illegal select op, however, because that may be difficult to lower.
10688   EVT VT = Cast->getValueType(0);
10689   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
10690     return SDValue();
10691
10692   SDValue VSel = Cast->getOperand(0);
10693   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
10694       VSel.getOperand(0).getOpcode() != ISD::SETCC)
10695     return SDValue();
10696
10697   // Does the setcc have the same vector size as the casted select?
10698   SDValue SetCC = VSel.getOperand(0);
10699   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
10700   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
10701     return SDValue();
10702
10703   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
10704   SDValue A = VSel.getOperand(1);
10705   SDValue B = VSel.getOperand(2);
10706   SDValue CastA, CastB;
10707   SDLoc DL(Cast);
10708   if (CastOpcode == ISD::FP_ROUND) {
10709     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
10710     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
10711     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
10712   } else {
10713     CastA = DAG.getNode(CastOpcode, DL, VT, A);
10714     CastB = DAG.getNode(CastOpcode, DL, VT, B);
10715   }
10716   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
10717 }
10718
10719 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10720 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10721 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
10722                                      const TargetLowering &TLI, EVT VT,
10723                                      bool LegalOperations, SDNode *N,
10724                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
10725   SDNode *N0Node = N0.getNode();
10726   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
10727                                                    : ISD::isZEXTLoad(N0Node);
10728   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
10729       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
10730     return SDValue();
10731
10732   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10733   EVT MemVT = LN0->getMemoryVT();
10734   if ((LegalOperations || !LN0->isSimple() ||
10735        VT.isVector()) &&
10736       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
10737     return SDValue();
10738
10739   SDValue ExtLoad =
10740       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10741                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
10742   Combiner.CombineTo(N, ExtLoad);
10743   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10744   if (LN0->use_empty())
10745     Combiner.recursivelyDeleteUnusedNodes(LN0);
10746   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10747 }
10748
10749 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10750 // Only generate vector extloads when 1) they're legal, and 2) they are
10751 // deemed desirable by the target.
10752 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
10753                                   const TargetLowering &TLI, EVT VT,
10754                                   bool LegalOperations, SDNode *N, SDValue N0,
10755                                   ISD::LoadExtType ExtLoadType,
10756                                   ISD::NodeType ExtOpc) {
10757   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
10758       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
10759       ((LegalOperations || VT.isVector() ||
10760         !cast<LoadSDNode>(N0)->isSimple()) &&
10761        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
10762     return {};
10763
10764   bool DoXform = true;
10765   SmallVector<SDNode *, 4> SetCCs;
10766   if (!N0.hasOneUse())
10767     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
10768   if (VT.isVector())
10769     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
10770   if (!DoXform)
10771     return {};
10772
10773   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10774   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10775                                    LN0->getBasePtr(), N0.getValueType(),
10776                                    LN0->getMemOperand());
10777   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
10778   // If the load value is used only by N, replace it via CombineTo N.
10779   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
10780   Combiner.CombineTo(N, ExtLoad);
10781   if (NoReplaceTrunc) {
10782     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10783     Combiner.recursivelyDeleteUnusedNodes(LN0);
10784   } else {
10785     SDValue Trunc =
10786         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
10787     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10788   }
10789   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10790 }
10791
10792 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
10793                                         const TargetLowering &TLI, EVT VT,
10794                                         SDNode *N, SDValue N0,
10795                                         ISD::LoadExtType ExtLoadType,
10796                                         ISD::NodeType ExtOpc) {
10797   if (!N0.hasOneUse())
10798     return SDValue();
10799
10800   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
10801   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
10802     return SDValue();
10803
10804   if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
10805     return SDValue();
10806
10807   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10808     return SDValue();
10809
10810   SDLoc dl(Ld);
10811   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
10812   SDValue NewLoad = DAG.getMaskedLoad(
10813       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
10814       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
10815       ExtLoadType, Ld->isExpandingLoad());
10816   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
10817   return NewLoad;
10818 }
10819
10820 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
10821                                        bool LegalOperations) {
10822   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10823           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
10824
10825   SDValue SetCC = N->getOperand(0);
10826   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
10827       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
10828     return SDValue();
10829
10830   SDValue X = SetCC.getOperand(0);
10831   SDValue Ones = SetCC.getOperand(1);
10832   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
10833   EVT VT = N->getValueType(0);
10834   EVT XVT = X.getValueType();
10835   // setge X, C is canonicalized to setgt, so we do not need to match that
10836   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
10837   // not require the 'not' op.
10838   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
10839     // Invert and smear/shift the sign bit:
10840     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
10841     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10842     SDLoc DL(N);
10843     unsigned ShCt = VT.getSizeInBits() - 1;
10844     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10845     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10846       SDValue NotX = DAG.getNOT(DL, X, VT);
10847       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10848       auto ShiftOpcode =
10849         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10850       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10851     }
10852   }
10853   return SDValue();
10854 }
10855
10856 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
10857   SDValue N0 = N->getOperand(0);
10858   if (N0.getOpcode() != ISD::SETCC)
10859     return SDValue();
10860
10861   SDValue N00 = N0.getOperand(0);
10862   SDValue N01 = N0.getOperand(1);
10863   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10864   EVT VT = N->getValueType(0);
10865   EVT N00VT = N00.getValueType();
10866   SDLoc DL(N);
10867
10868   // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10869   // the same size as the compared operands. Try to optimize sext(setcc())
10870   // if this is the case.
10871   if (VT.isVector() && !LegalOperations &&
10872       TLI.getBooleanContents(N00VT) ==
10873           TargetLowering::ZeroOrNegativeOneBooleanContent) {
10874     EVT SVT = getSetCCResultType(N00VT);
10875
10876     // If we already have the desired type, don't change it.
10877     if (SVT != N0.getValueType()) {
10878       // We know that the # elements of the results is the same as the
10879       // # elements of the compare (and the # elements of the compare result
10880       // for that matter).  Check to see that they are the same size.  If so,
10881       // we know that the element size of the sext'd result matches the
10882       // element size of the compare operands.
10883       if (VT.getSizeInBits() == SVT.getSizeInBits())
10884         return DAG.getSetCC(DL, VT, N00, N01, CC);
10885
10886       // If the desired elements are smaller or larger than the source
10887       // elements, we can use a matching integer vector type and then
10888       // truncate/sign extend.
10889       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10890       if (SVT == MatchingVecType) {
10891         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10892         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10893       }
10894     }
10895
10896     // Try to eliminate the sext of a setcc by zexting the compare operands.
10897     if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
10898         !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
10899       bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
10900       unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10901       unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10902
10903       // We have an unsupported narrow vector compare op that would be legal
10904       // if extended to the destination type. See if the compare operands
10905       // can be freely extended to the destination type.
10906       auto IsFreeToExtend = [&](SDValue V) {
10907         if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
10908           return true;
10909         // Match a simple, non-extended load that can be converted to a
10910         // legal {z/s}ext-load.
10911         // TODO: Allow widening of an existing {z/s}ext-load?
10912         if (!(ISD::isNON_EXTLoad(V.getNode()) &&
10913               ISD::isUNINDEXEDLoad(V.getNode()) &&
10914               cast<LoadSDNode>(V)->isSimple() &&
10915               TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
10916           return false;
10917
10918         // Non-chain users of this value must either be the setcc in this
10919         // sequence or extends that can be folded into the new {z/s}ext-load.
10920         for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
10921              UI != UE; ++UI) {
10922           // Skip uses of the chain and the setcc.
10923           SDNode *User = *UI;
10924           if (UI.getUse().getResNo() != 0 || User == N0.getNode())
10925             continue;
10926           // Extra users must have exactly the same cast we are about to create.
10927           // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
10928           //       is enhanced similarly.
10929           if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
10930             return false;
10931         }
10932         return true;
10933       };
10934
10935       if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
10936         SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
10937         SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
10938         return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
10939       }
10940     }
10941   }
10942
10943   // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
10944   // Here, T can be 1 or -1, depending on the type of the setcc and
10945   // getBooleanContents().
10946   unsigned SetCCWidth = N0.getScalarValueSizeInBits();
10947
10948   // To determine the "true" side of the select, we need to know the high bit
10949   // of the value returned by the setcc if it evaluates to true.
10950   // If the type of the setcc is i1, then the true case of the select is just
10951   // sext(i1 1), that is, -1.
10952   // If the type of the setcc is larger (say, i8) then the value of the high
10953   // bit depends on getBooleanContents(), so ask TLI for a real "true" value
10954   // of the appropriate width.
10955   SDValue ExtTrueVal = (SetCCWidth == 1)
10956                            ? DAG.getAllOnesConstant(DL, VT)
10957                            : DAG.getBoolConstant(true, DL, VT, N00VT);
10958   SDValue Zero = DAG.getConstant(0, DL, VT);
10959   if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
10960     return SCC;
10961
10962   if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
10963     EVT SetCCVT = getSetCCResultType(N00VT);
10964     // Don't do this transform for i1 because there's a select transform
10965     // that would reverse it.
10966     // TODO: We should not do this transform at all without a target hook
10967     // because a sext is likely cheaper than a select?
10968     if (SetCCVT.getScalarSizeInBits() != 1 &&
10969         (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
10970       SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
10971       return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
10972     }
10973   }
10974
10975   return SDValue();
10976 }
10977
10978 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
10979   SDValue N0 = N->getOperand(0);
10980   EVT VT = N->getValueType(0);
10981   SDLoc DL(N);
10982
10983   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10984     return Res;
10985
10986   // fold (sext (sext x)) -> (sext x)
10987   // fold (sext (aext x)) -> (sext x)
10988   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10989     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
10990
10991   if (N0.getOpcode() == ISD::TRUNCATE) {
10992     // fold (sext (truncate (load x))) -> (sext (smaller load x))
10993     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
10994     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10995       SDNode *oye = N0.getOperand(0).getNode();
10996       if (NarrowLoad.getNode() != N0.getNode()) {
10997         CombineTo(N0.getNode(), NarrowLoad);
10998         // CombineTo deleted the truncate, if needed, but not what's under it.
10999         AddToWorklist(oye);
11000       }
11001       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11002     }
11003
11004     // See if the value being truncated is already sign extended.  If so, just
11005     // eliminate the trunc/sext pair.
11006     SDValue Op = N0.getOperand(0);
11007     unsigned OpBits   = Op.getScalarValueSizeInBits();
11008     unsigned MidBits  = N0.getScalarValueSizeInBits();
11009     unsigned DestBits = VT.getScalarSizeInBits();
11010     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11011
11012     if (OpBits == DestBits) {
11013       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
11014       // bits, it is already ready.
11015       if (NumSignBits > DestBits-MidBits)
11016         return Op;
11017     } else if (OpBits < DestBits) {
11018       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
11019       // bits, just sext from i32.
11020       if (NumSignBits > OpBits-MidBits)
11021         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11022     } else {
11023       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
11024       // bits, just truncate to i32.
11025       if (NumSignBits > OpBits-MidBits)
11026         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11027     }
11028
11029     // fold (sext (truncate x)) -> (sextinreg x).
11030     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11031                                                  N0.getValueType())) {
11032       if (OpBits < DestBits)
11033         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11034       else if (OpBits > DestBits)
11035         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11036       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11037                          DAG.getValueType(N0.getValueType()));
11038     }
11039   }
11040
11041   // Try to simplify (sext (load x)).
11042   if (SDValue foldedExt =
11043           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11044                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
11045     return foldedExt;
11046
11047   if (SDValue foldedExt =
11048       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11049                                ISD::SIGN_EXTEND))
11050     return foldedExt;
11051
11052   // fold (sext (load x)) to multiple smaller sextloads.
11053   // Only on illegal but splittable vectors.
11054   if (SDValue ExtLoad = CombineExtLoad(N))
11055     return ExtLoad;
11056
11057   // Try to simplify (sext (sextload x)).
11058   if (SDValue foldedExt = tryToFoldExtOfExtload(
11059           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11060     return foldedExt;
11061
11062   // fold (sext (and/or/xor (load x), cst)) ->
11063   //      (and/or/xor (sextload x), (sext cst))
11064   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11065        N0.getOpcode() == ISD::XOR) &&
11066       isa<LoadSDNode>(N0.getOperand(0)) &&
11067       N0.getOperand(1).getOpcode() == ISD::Constant &&
11068       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11069     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11070     EVT MemVT = LN00->getMemoryVT();
11071     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11072       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11073       SmallVector<SDNode*, 4> SetCCs;
11074       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11075                                              ISD::SIGN_EXTEND, SetCCs, TLI);
11076       if (DoXform) {
11077         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
11078                                          LN00->getChain(), LN00->getBasePtr(),
11079                                          LN00->getMemoryVT(),
11080                                          LN00->getMemOperand());
11081         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
11082         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11083                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
11084         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
11085         bool NoReplaceTruncAnd = !N0.hasOneUse();
11086         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11087         CombineTo(N, And);
11088         // If N0 has multiple uses, change other uses as well.
11089         if (NoReplaceTruncAnd) {
11090           SDValue TruncAnd =
11091               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11092           CombineTo(N0.getNode(), TruncAnd);
11093         }
11094         if (NoReplaceTrunc) {
11095           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11096         } else {
11097           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11098                                       LN00->getValueType(0), ExtLoad);
11099           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11100         }
11101         return SDValue(N,0); // Return N so it doesn't get rechecked!
11102       }
11103     }
11104   }
11105
11106   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11107     return V;
11108
11109   if (SDValue V = foldSextSetcc(N))
11110     return V;
11111
11112   // fold (sext x) -> (zext x) if the sign bit is known zero.
11113   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11114       DAG.SignBitIsZero(N0))
11115     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11116
11117   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11118     return NewVSel;
11119
11120   // Eliminate this sign extend by doing a negation in the destination type:
11121   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11122   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11123       isNullOrNullSplat(N0.getOperand(0)) &&
11124       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
11125       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
11126     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11127     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11128   }
11129   // Eliminate this sign extend by doing a decrement in the destination type:
11130   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11131   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11132       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
11133       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11134       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
11135     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11136     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11137   }
11138
11139   // fold sext (not i1 X) -> add (zext i1 X), -1
11140   // TODO: This could be extended to handle bool vectors.
11141   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11142       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11143                             TLI.isOperationLegal(ISD::ADD, VT)))) {
11144     // If we can eliminate the 'not', the sext form should be better
11145     if (SDValue NewXor = visitXOR(N0.getNode())) {
11146       // Returning N0 is a form of in-visit replacement that may have
11147       // invalidated N0.
11148       if (NewXor.getNode() == N0.getNode()) {
11149         // Return SDValue here as the xor should have already been replaced in
11150         // this sext.
11151         return SDValue();
11152       } else {
11153         // Return a new sext with the new xor.
11154         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11155       }
11156     }
11157
11158     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11159     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11160   }
11161
11162   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11163     return Res;
11164
11165   return SDValue();
11166 }
11167
11168 // isTruncateOf - If N is a truncate of some other value, return true, record
11169 // the value being truncated in Op and which of Op's bits are zero/one in Known.
11170 // This function computes KnownBits to avoid a duplicated call to
11171 // computeKnownBits in the caller.
11172 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
11173                          KnownBits &Known) {
11174   if (N->getOpcode() == ISD::TRUNCATE) {
11175     Op = N->getOperand(0);
11176     Known = DAG.computeKnownBits(Op);
11177     return true;
11178   }
11179
11180   if (N.getOpcode() != ISD::SETCC ||
11181       N.getValueType().getScalarType() != MVT::i1 ||
11182       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11183     return false;
11184
11185   SDValue Op0 = N->getOperand(0);
11186   SDValue Op1 = N->getOperand(1);
11187   assert(Op0.getValueType() == Op1.getValueType());
11188
11189   if (isNullOrNullSplat(Op0))
11190     Op = Op1;
11191   else if (isNullOrNullSplat(Op1))
11192     Op = Op0;
11193   else
11194     return false;
11195
11196   Known = DAG.computeKnownBits(Op);
11197
11198   return (Known.Zero | 1).isAllOnesValue();
11199 }
11200
11201 /// Given an extending node with a pop-count operand, if the target does not
11202 /// support a pop-count in the narrow source type but does support it in the
11203 /// destination type, widen the pop-count to the destination type.
11204 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
11205   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
11206           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
11207
11208   SDValue CtPop = Extend->getOperand(0);
11209   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11210     return SDValue();
11211
11212   EVT VT = Extend->getValueType(0);
11213   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11214   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11215       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11216     return SDValue();
11217
11218   // zext (ctpop X) --> ctpop (zext X)
11219   SDLoc DL(Extend);
11220   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11221   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11222 }
11223
11224 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11225   SDValue N0 = N->getOperand(0);
11226   EVT VT = N->getValueType(0);
11227
11228   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11229     return Res;
11230
11231   // fold (zext (zext x)) -> (zext x)
11232   // fold (zext (aext x)) -> (zext x)
11233   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11234     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11235                        N0.getOperand(0));
11236
11237   // fold (zext (truncate x)) -> (zext x) or
11238   //      (zext (truncate x)) -> (truncate x)
11239   // This is valid when the truncated bits of x are already zero.
11240   SDValue Op;
11241   KnownBits Known;
11242   if (isTruncateOf(DAG, N0, Op, Known)) {
11243     APInt TruncatedBits =
11244       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11245       APInt(Op.getScalarValueSizeInBits(), 0) :
11246       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11247                         N0.getScalarValueSizeInBits(),
11248                         std::min(Op.getScalarValueSizeInBits(),
11249                                  VT.getScalarSizeInBits()));
11250     if (TruncatedBits.isSubsetOf(Known.Zero))
11251       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11252   }
11253
11254   // fold (zext (truncate x)) -> (and x, mask)
11255   if (N0.getOpcode() == ISD::TRUNCATE) {
11256     // fold (zext (truncate (load x))) -> (zext (smaller load x))
11257     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11258     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11259       SDNode *oye = N0.getOperand(0).getNode();
11260       if (NarrowLoad.getNode() != N0.getNode()) {
11261         CombineTo(N0.getNode(), NarrowLoad);
11262         // CombineTo deleted the truncate, if needed, but not what's under it.
11263         AddToWorklist(oye);
11264       }
11265       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11266     }
11267
11268     EVT SrcVT = N0.getOperand(0).getValueType();
11269     EVT MinVT = N0.getValueType();
11270
11271     // Try to mask before the extension to avoid having to generate a larger mask,
11272     // possibly over several sub-vectors.
11273     if (SrcVT.bitsLT(VT) && VT.isVector()) {
11274       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11275                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11276         SDValue Op = N0.getOperand(0);
11277         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11278         AddToWorklist(Op.getNode());
11279         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11280         // Transfer the debug info; the new node is equivalent to N0.
11281         DAG.transferDbgValues(N0, ZExtOrTrunc);
11282         return ZExtOrTrunc;
11283       }
11284     }
11285
11286     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11287       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11288       AddToWorklist(Op.getNode());
11289       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11290       // We may safely transfer the debug info describing the truncate node over
11291       // to the equivalent and operation.
11292       DAG.transferDbgValues(N0, And);
11293       return And;
11294     }
11295   }
11296
11297   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11298   // if either of the casts is not free.
11299   if (N0.getOpcode() == ISD::AND &&
11300       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11301       N0.getOperand(1).getOpcode() == ISD::Constant &&
11302       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11303                            N0.getValueType()) ||
11304        !TLI.isZExtFree(N0.getValueType(), VT))) {
11305     SDValue X = N0.getOperand(0).getOperand(0);
11306     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11307     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11308     SDLoc DL(N);
11309     return DAG.getNode(ISD::AND, DL, VT,
11310                        X, DAG.getConstant(Mask, DL, VT));
11311   }
11312
11313   // Try to simplify (zext (load x)).
11314   if (SDValue foldedExt =
11315           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11316                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11317     return foldedExt;
11318
11319   if (SDValue foldedExt =
11320       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11321                                ISD::ZERO_EXTEND))
11322     return foldedExt;
11323
11324   // fold (zext (load x)) to multiple smaller zextloads.
11325   // Only on illegal but splittable vectors.
11326   if (SDValue ExtLoad = CombineExtLoad(N))
11327     return ExtLoad;
11328
11329   // fold (zext (and/or/xor (load x), cst)) ->
11330   //      (and/or/xor (zextload x), (zext cst))
11331   // Unless (and (load x) cst) will match as a zextload already and has
11332   // additional users.
11333   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11334        N0.getOpcode() == ISD::XOR) &&
11335       isa<LoadSDNode>(N0.getOperand(0)) &&
11336       N0.getOperand(1).getOpcode() == ISD::Constant &&
11337       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11338     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11339     EVT MemVT = LN00->getMemoryVT();
11340     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11341         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11342       bool DoXform = true;
11343       SmallVector<SDNode*, 4> SetCCs;
11344       if (!N0.hasOneUse()) {
11345         if (N0.getOpcode() == ISD::AND) {
11346           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11347           EVT LoadResultTy = AndC->getValueType(0);
11348           EVT ExtVT;
11349           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11350             DoXform = false;
11351         }
11352       }
11353       if (DoXform)
11354         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11355                                           ISD::ZERO_EXTEND, SetCCs, TLI);
11356       if (DoXform) {
11357         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11358                                          LN00->getChain(), LN00->getBasePtr(),
11359                                          LN00->getMemoryVT(),
11360                                          LN00->getMemOperand());
11361         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11362         SDLoc DL(N);
11363         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11364                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
11365         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11366         bool NoReplaceTruncAnd = !N0.hasOneUse();
11367         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11368         CombineTo(N, And);
11369         // If N0 has multiple uses, change other uses as well.
11370         if (NoReplaceTruncAnd) {
11371           SDValue TruncAnd =
11372               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11373           CombineTo(N0.getNode(), TruncAnd);
11374         }
11375         if (NoReplaceTrunc) {
11376           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11377         } else {
11378           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11379                                       LN00->getValueType(0), ExtLoad);
11380           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11381         }
11382         return SDValue(N,0); // Return N so it doesn't get rechecked!
11383       }
11384     }
11385   }
11386
11387   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11388   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11389   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11390     return ZExtLoad;
11391
11392   // Try to simplify (zext (zextload x)).
11393   if (SDValue foldedExt = tryToFoldExtOfExtload(
11394           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11395     return foldedExt;
11396
11397   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11398     return V;
11399
11400   if (N0.getOpcode() == ISD::SETCC) {
11401     // Only do this before legalize for now.
11402     if (!LegalOperations && VT.isVector() &&
11403         N0.getValueType().getVectorElementType() == MVT::i1) {
11404       EVT N00VT = N0.getOperand(0).getValueType();
11405       if (getSetCCResultType(N00VT) == N0.getValueType())
11406         return SDValue();
11407
11408       // We know that the # elements of the results is the same as the #
11409       // elements of the compare (and the # elements of the compare result for
11410       // that matter). Check to see that they are the same size. If so, we know
11411       // that the element size of the sext'd result matches the element size of
11412       // the compare operands.
11413       SDLoc DL(N);
11414       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11415         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11416         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11417                                      N0.getOperand(1), N0.getOperand(2));
11418         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11419       }
11420
11421       // If the desired elements are smaller or larger than the source
11422       // elements we can use a matching integer vector type and then
11423       // truncate/any extend followed by zext_in_reg.
11424       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11425       SDValue VsetCC =
11426           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11427                       N0.getOperand(1), N0.getOperand(2));
11428       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11429                                     N0.getValueType());
11430     }
11431
11432     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11433     SDLoc DL(N);
11434     EVT N0VT = N0.getValueType();
11435     EVT N00VT = N0.getOperand(0).getValueType();
11436     if (SDValue SCC = SimplifySelectCC(
11437             DL, N0.getOperand(0), N0.getOperand(1),
11438             DAG.getBoolConstant(true, DL, N0VT, N00VT),
11439             DAG.getBoolConstant(false, DL, N0VT, N00VT),
11440             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11441       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11442   }
11443
11444   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11445   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11446       isa<ConstantSDNode>(N0.getOperand(1)) &&
11447       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11448       N0.hasOneUse()) {
11449     SDValue ShAmt = N0.getOperand(1);
11450     if (N0.getOpcode() == ISD::SHL) {
11451       SDValue InnerZExt = N0.getOperand(0);
11452       // If the original shl may be shifting out bits, do not perform this
11453       // transformation.
11454       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11455         InnerZExt.getOperand(0).getValueSizeInBits();
11456       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11457         return SDValue();
11458     }
11459
11460     SDLoc DL(N);
11461
11462     // Ensure that the shift amount is wide enough for the shifted value.
11463     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11464       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11465
11466     return DAG.getNode(N0.getOpcode(), DL, VT,
11467                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11468                        ShAmt);
11469   }
11470
11471   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11472     return NewVSel;
11473
11474   if (SDValue NewCtPop = widenCtPop(N, DAG))
11475     return NewCtPop;
11476
11477   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11478     return Res;
11479
11480   return SDValue();
11481 }
11482
11483 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11484   SDValue N0 = N->getOperand(0);
11485   EVT VT = N->getValueType(0);
11486
11487   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11488     return Res;
11489
11490   // fold (aext (aext x)) -> (aext x)
11491   // fold (aext (zext x)) -> (zext x)
11492   // fold (aext (sext x)) -> (sext x)
11493   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
11494       N0.getOpcode() == ISD::ZERO_EXTEND ||
11495       N0.getOpcode() == ISD::SIGN_EXTEND)
11496     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11497
11498   // fold (aext (truncate (load x))) -> (aext (smaller load x))
11499   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
11500   if (N0.getOpcode() == ISD::TRUNCATE) {
11501     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11502       SDNode *oye = N0.getOperand(0).getNode();
11503       if (NarrowLoad.getNode() != N0.getNode()) {
11504         CombineTo(N0.getNode(), NarrowLoad);
11505         // CombineTo deleted the truncate, if needed, but not what's under it.
11506         AddToWorklist(oye);
11507       }
11508       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11509     }
11510   }
11511
11512   // fold (aext (truncate x))
11513   if (N0.getOpcode() == ISD::TRUNCATE)
11514     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11515
11516   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
11517   // if the trunc is not free.
11518   if (N0.getOpcode() == ISD::AND &&
11519       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11520       N0.getOperand(1).getOpcode() == ISD::Constant &&
11521       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11522                           N0.getValueType())) {
11523     SDLoc DL(N);
11524     SDValue X = N0.getOperand(0).getOperand(0);
11525     X = DAG.getAnyExtOrTrunc(X, DL, VT);
11526     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11527     return DAG.getNode(ISD::AND, DL, VT,
11528                        X, DAG.getConstant(Mask, DL, VT));
11529   }
11530
11531   // fold (aext (load x)) -> (aext (truncate (extload x)))
11532   // None of the supported targets knows how to perform load and any_ext
11533   // on vectors in one instruction, so attempt to fold to zext instead.
11534   if (VT.isVector()) {
11535     // Try to simplify (zext (load x)).
11536     if (SDValue foldedExt =
11537             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11538                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11539       return foldedExt;
11540   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
11541              ISD::isUNINDEXEDLoad(N0.getNode()) &&
11542              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11543     bool DoXform = true;
11544     SmallVector<SDNode *, 4> SetCCs;
11545     if (!N0.hasOneUse())
11546       DoXform =
11547           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
11548     if (DoXform) {
11549       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11550       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11551                                        LN0->getChain(), LN0->getBasePtr(),
11552                                        N0.getValueType(), LN0->getMemOperand());
11553       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
11554       // If the load value is used only by N, replace it via CombineTo N.
11555       bool NoReplaceTrunc = N0.hasOneUse();
11556       CombineTo(N, ExtLoad);
11557       if (NoReplaceTrunc) {
11558         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11559         recursivelyDeleteUnusedNodes(LN0);
11560       } else {
11561         SDValue Trunc =
11562             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11563         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11564       }
11565       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11566     }
11567   }
11568
11569   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
11570   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
11571   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
11572   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
11573       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
11574     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11575     ISD::LoadExtType ExtType = LN0->getExtensionType();
11576     EVT MemVT = LN0->getMemoryVT();
11577     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
11578       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
11579                                        VT, LN0->getChain(), LN0->getBasePtr(),
11580                                        MemVT, LN0->getMemOperand());
11581       CombineTo(N, ExtLoad);
11582       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11583       recursivelyDeleteUnusedNodes(LN0);
11584       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11585     }
11586   }
11587
11588   if (N0.getOpcode() == ISD::SETCC) {
11589     // For vectors:
11590     // aext(setcc) -> vsetcc
11591     // aext(setcc) -> truncate(vsetcc)
11592     // aext(setcc) -> aext(vsetcc)
11593     // Only do this before legalize for now.
11594     if (VT.isVector() && !LegalOperations) {
11595       EVT N00VT = N0.getOperand(0).getValueType();
11596       if (getSetCCResultType(N00VT) == N0.getValueType())
11597         return SDValue();
11598
11599       // We know that the # elements of the results is the same as the
11600       // # elements of the compare (and the # elements of the compare result
11601       // for that matter).  Check to see that they are the same size.  If so,
11602       // we know that the element size of the sext'd result matches the
11603       // element size of the compare operands.
11604       if (VT.getSizeInBits() == N00VT.getSizeInBits())
11605         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
11606                              N0.getOperand(1),
11607                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
11608
11609       // If the desired elements are smaller or larger than the source
11610       // elements we can use a matching integer vector type and then
11611       // truncate/any extend
11612       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11613       SDValue VsetCC =
11614         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
11615                       N0.getOperand(1),
11616                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
11617       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
11618     }
11619
11620     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
11621     SDLoc DL(N);
11622     if (SDValue SCC = SimplifySelectCC(
11623             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
11624             DAG.getConstant(0, DL, VT),
11625             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11626       return SCC;
11627   }
11628
11629   if (SDValue NewCtPop = widenCtPop(N, DAG))
11630     return NewCtPop;
11631
11632   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11633     return Res;
11634
11635   return SDValue();
11636 }
11637
11638 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
11639   unsigned Opcode = N->getOpcode();
11640   SDValue N0 = N->getOperand(0);
11641   SDValue N1 = N->getOperand(1);
11642   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
11643
11644   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
11645   if (N0.getOpcode() == Opcode &&
11646       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
11647     return N0;
11648
11649   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11650       N0.getOperand(0).getOpcode() == Opcode) {
11651     // We have an assert, truncate, assert sandwich. Make one stronger assert
11652     // by asserting on the smallest asserted type to the larger source type.
11653     // This eliminates the later assert:
11654     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
11655     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
11656     SDValue BigA = N0.getOperand(0);
11657     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11658     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
11659            "Asserting zero/sign-extended bits to a type larger than the "
11660            "truncated destination does not provide information");
11661
11662     SDLoc DL(N);
11663     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
11664     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
11665     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11666                                     BigA.getOperand(0), MinAssertVTVal);
11667     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11668   }
11669
11670   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
11671   // than X. Just move the AssertZext in front of the truncate and drop the
11672   // AssertSExt.
11673   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11674       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
11675       Opcode == ISD::AssertZext) {
11676     SDValue BigA = N0.getOperand(0);
11677     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11678     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
11679            "Asserting zero/sign-extended bits to a type larger than the "
11680            "truncated destination does not provide information");
11681
11682     if (AssertVT.bitsLT(BigA_AssertVT)) {
11683       SDLoc DL(N);
11684       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11685                                       BigA.getOperand(0), N1);
11686       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11687     }
11688   }
11689
11690   return SDValue();
11691 }
11692
11693 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
11694   SDLoc DL(N);
11695
11696   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
11697   SDValue N0 = N->getOperand(0);
11698
11699   // Fold (assertalign (assertalign x, AL0), AL1) ->
11700   // (assertalign x, max(AL0, AL1))
11701   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
11702     return DAG.getAssertAlign(DL, N0.getOperand(0),
11703                               std::max(AL, AAN->getAlign()));
11704
11705   // In rare cases, there are trivial arithmetic ops in source operands. Sink
11706   // this assert down to source operands so that those arithmetic ops could be
11707   // exposed to the DAG combining.
11708   switch (N0.getOpcode()) {
11709   default:
11710     break;
11711   case ISD::ADD:
11712   case ISD::SUB: {
11713     unsigned AlignShift = Log2(AL);
11714     SDValue LHS = N0.getOperand(0);
11715     SDValue RHS = N0.getOperand(1);
11716     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
11717     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11718     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
11719       if (LHSAlignShift < AlignShift)
11720         LHS = DAG.getAssertAlign(DL, LHS, AL);
11721       if (RHSAlignShift < AlignShift)
11722         RHS = DAG.getAssertAlign(DL, RHS, AL);
11723       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
11724     }
11725     break;
11726   }
11727   }
11728
11729   return SDValue();
11730 }
11731
11732 /// If the result of a wider load is shifted to right of N  bits and then
11733 /// truncated to a narrower type and where N is a multiple of number of bits of
11734 /// the narrower type, transform it to a narrower load from address + N / num of
11735 /// bits of new type. Also narrow the load if the result is masked with an AND
11736 /// to effectively produce a smaller type. If the result is to be extended, also
11737 /// fold the extension to form a extending load.
11738 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
11739   unsigned Opc = N->getOpcode();
11740
11741   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
11742   SDValue N0 = N->getOperand(0);
11743   EVT VT = N->getValueType(0);
11744   EVT ExtVT = VT;
11745
11746   // This transformation isn't valid for vector loads.
11747   if (VT.isVector())
11748     return SDValue();
11749
11750   unsigned ShAmt = 0;
11751   bool HasShiftedOffset = false;
11752   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
11753   // extended to VT.
11754   if (Opc == ISD::SIGN_EXTEND_INREG) {
11755     ExtType = ISD::SEXTLOAD;
11756     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11757   } else if (Opc == ISD::SRL) {
11758     // Another special-case: SRL is basically zero-extending a narrower value,
11759     // or it maybe shifting a higher subword, half or byte into the lowest
11760     // bits.
11761     ExtType = ISD::ZEXTLOAD;
11762     N0 = SDValue(N, 0);
11763
11764     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
11765     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11766     if (!N01 || !LN0)
11767       return SDValue();
11768
11769     uint64_t ShiftAmt = N01->getZExtValue();
11770     uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
11771     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
11772       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
11773     else
11774       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
11775                                 VT.getScalarSizeInBits() - ShiftAmt);
11776   } else if (Opc == ISD::AND) {
11777     // An AND with a constant mask is the same as a truncate + zero-extend.
11778     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
11779     if (!AndC)
11780       return SDValue();
11781
11782     const APInt &Mask = AndC->getAPIntValue();
11783     unsigned ActiveBits = 0;
11784     if (Mask.isMask()) {
11785       ActiveBits = Mask.countTrailingOnes();
11786     } else if (Mask.isShiftedMask()) {
11787       ShAmt = Mask.countTrailingZeros();
11788       APInt ShiftedMask = Mask.lshr(ShAmt);
11789       ActiveBits = ShiftedMask.countTrailingOnes();
11790       HasShiftedOffset = true;
11791     } else
11792       return SDValue();
11793
11794     ExtType = ISD::ZEXTLOAD;
11795     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
11796   }
11797
11798   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
11799     SDValue SRL = N0;
11800     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
11801       ShAmt = ConstShift->getZExtValue();
11802       unsigned EVTBits = ExtVT.getScalarSizeInBits();
11803       // Is the shift amount a multiple of size of VT?
11804       if ((ShAmt & (EVTBits-1)) == 0) {
11805         N0 = N0.getOperand(0);
11806         // Is the load width a multiple of size of VT?
11807         if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
11808           return SDValue();
11809       }
11810
11811       // At this point, we must have a load or else we can't do the transform.
11812       auto *LN0 = dyn_cast<LoadSDNode>(N0);
11813       if (!LN0) return SDValue();
11814
11815       // Because a SRL must be assumed to *need* to zero-extend the high bits
11816       // (as opposed to anyext the high bits), we can't combine the zextload
11817       // lowering of SRL and an sextload.
11818       if (LN0->getExtensionType() == ISD::SEXTLOAD)
11819         return SDValue();
11820
11821       // If the shift amount is larger than the input type then we're not
11822       // accessing any of the loaded bytes.  If the load was a zextload/extload
11823       // then the result of the shift+trunc is zero/undef (handled elsewhere).
11824       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
11825         return SDValue();
11826
11827       // If the SRL is only used by a masking AND, we may be able to adjust
11828       // the ExtVT to make the AND redundant.
11829       SDNode *Mask = *(SRL->use_begin());
11830       if (Mask->getOpcode() == ISD::AND &&
11831           isa<ConstantSDNode>(Mask->getOperand(1))) {
11832         const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
11833         if (ShiftMask.isMask()) {
11834           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
11835                                            ShiftMask.countTrailingOnes());
11836           // If the mask is smaller, recompute the type.
11837           if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
11838               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
11839             ExtVT = MaskedVT;
11840         }
11841       }
11842     }
11843   }
11844
11845   // If the load is shifted left (and the result isn't shifted back right),
11846   // we can fold the truncate through the shift.
11847   unsigned ShLeftAmt = 0;
11848   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11849       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
11850     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
11851       ShLeftAmt = N01->getZExtValue();
11852       N0 = N0.getOperand(0);
11853     }
11854   }
11855
11856   // If we haven't found a load, we can't narrow it.
11857   if (!isa<LoadSDNode>(N0))
11858     return SDValue();
11859
11860   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11861   // Reducing the width of a volatile load is illegal.  For atomics, we may be
11862   // able to reduce the width provided we never widen again. (see D66309)
11863   if (!LN0->isSimple() ||
11864       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
11865     return SDValue();
11866
11867   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
11868     unsigned LVTStoreBits =
11869         LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
11870     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
11871     return LVTStoreBits - EVTStoreBits - ShAmt;
11872   };
11873
11874   // For big endian targets, we need to adjust the offset to the pointer to
11875   // load the correct bytes.
11876   if (DAG.getDataLayout().isBigEndian())
11877     ShAmt = AdjustBigEndianShift(ShAmt);
11878
11879   uint64_t PtrOff = ShAmt / 8;
11880   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
11881   SDLoc DL(LN0);
11882   // The original load itself didn't wrap, so an offset within it doesn't.
11883   SDNodeFlags Flags;
11884   Flags.setNoUnsignedWrap(true);
11885   SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
11886                                             TypeSize::Fixed(PtrOff), DL, Flags);
11887   AddToWorklist(NewPtr.getNode());
11888
11889   SDValue Load;
11890   if (ExtType == ISD::NON_EXTLOAD)
11891     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
11892                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11893                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11894   else
11895     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
11896                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
11897                           NewAlign, LN0->getMemOperand()->getFlags(),
11898                           LN0->getAAInfo());
11899
11900   // Replace the old load's chain with the new load's chain.
11901   WorklistRemover DeadNodes(*this);
11902   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11903
11904   // Shift the result left, if we've swallowed a left shift.
11905   SDValue Result = Load;
11906   if (ShLeftAmt != 0) {
11907     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
11908     if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
11909       ShImmTy = VT;
11910     // If the shift amount is as large as the result size (but, presumably,
11911     // no larger than the source) then the useful bits of the result are
11912     // zero; we can't simply return the shortened shift, because the result
11913     // of that operation is undefined.
11914     if (ShLeftAmt >= VT.getScalarSizeInBits())
11915       Result = DAG.getConstant(0, DL, VT);
11916     else
11917       Result = DAG.getNode(ISD::SHL, DL, VT,
11918                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
11919   }
11920
11921   if (HasShiftedOffset) {
11922     // Recalculate the shift amount after it has been altered to calculate
11923     // the offset.
11924     if (DAG.getDataLayout().isBigEndian())
11925       ShAmt = AdjustBigEndianShift(ShAmt);
11926
11927     // We're using a shifted mask, so the load now has an offset. This means
11928     // that data has been loaded into the lower bytes than it would have been
11929     // before, so we need to shl the loaded data into the correct position in the
11930     // register.
11931     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
11932     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
11933     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
11934   }
11935
11936   // Return the new loaded value.
11937   return Result;
11938 }
11939
11940 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
11941   SDValue N0 = N->getOperand(0);
11942   SDValue N1 = N->getOperand(1);
11943   EVT VT = N->getValueType(0);
11944   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
11945   unsigned VTBits = VT.getScalarSizeInBits();
11946   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
11947
11948   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11949   if (N0.isUndef())
11950     return DAG.getConstant(0, SDLoc(N), VT);
11951
11952   // fold (sext_in_reg c1) -> c1
11953   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
11954     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
11955
11956   // If the input is already sign extended, just drop the extension.
11957   if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
11958     return N0;
11959
11960   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
11961   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
11962       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
11963     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
11964                        N1);
11965
11966   // fold (sext_in_reg (sext x)) -> (sext x)
11967   // fold (sext_in_reg (aext x)) -> (sext x)
11968   // if x is small enough or if we know that x has more than 1 sign bit and the
11969   // sign_extend_inreg is extending from one of them.
11970   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
11971     SDValue N00 = N0.getOperand(0);
11972     unsigned N00Bits = N00.getScalarValueSizeInBits();
11973     if ((N00Bits <= ExtVTBits ||
11974          (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
11975         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11976       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
11977   }
11978
11979   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
11980   // if x is small enough or if we know that x has more than 1 sign bit and the
11981   // sign_extend_inreg is extending from one of them.
11982   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
11983       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
11984       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
11985     SDValue N00 = N0.getOperand(0);
11986     unsigned N00Bits = N00.getScalarValueSizeInBits();
11987     unsigned DstElts = N0.getValueType().getVectorMinNumElements();
11988     unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
11989     bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
11990     APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
11991     if ((N00Bits == ExtVTBits ||
11992          (!IsZext && (N00Bits < ExtVTBits ||
11993                       (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
11994                           ExtVTBits))) &&
11995         (!LegalOperations ||
11996          TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
11997       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
11998   }
11999
12000   // fold (sext_in_reg (zext x)) -> (sext x)
12001   // iff we are extending the source sign bit.
12002   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12003     SDValue N00 = N0.getOperand(0);
12004     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12005         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12006       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12007   }
12008
12009   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12010   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12011     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12012
12013   // fold operands of sext_in_reg based on knowledge that the top bits are not
12014   // demanded.
12015   if (SimplifyDemandedBits(SDValue(N, 0)))
12016     return SDValue(N, 0);
12017
12018   // fold (sext_in_reg (load x)) -> (smaller sextload x)
12019   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12020   if (SDValue NarrowLoad = ReduceLoadWidth(N))
12021     return NarrowLoad;
12022
12023   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12024   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12025   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12026   if (N0.getOpcode() == ISD::SRL) {
12027     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12028       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12029         // We can turn this into an SRA iff the input to the SRL is already sign
12030         // extended enough.
12031         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12032         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12033           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12034                              N0.getOperand(1));
12035       }
12036   }
12037
12038   // fold (sext_inreg (extload x)) -> (sextload x)
12039   // If sextload is not supported by target, we can only do the combine when
12040   // load has one use. Doing otherwise can block folding the extload with other
12041   // extends that the target does support.
12042   if (ISD::isEXTLoad(N0.getNode()) &&
12043       ISD::isUNINDEXEDLoad(N0.getNode()) &&
12044       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12045       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12046         N0.hasOneUse()) ||
12047        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12048     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12049     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12050                                      LN0->getChain(),
12051                                      LN0->getBasePtr(), ExtVT,
12052                                      LN0->getMemOperand());
12053     CombineTo(N, ExtLoad);
12054     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12055     AddToWorklist(ExtLoad.getNode());
12056     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12057   }
12058
12059   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12060   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
12061       N0.hasOneUse() &&
12062       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12063       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12064        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12065     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12066     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12067                                      LN0->getChain(),
12068                                      LN0->getBasePtr(), ExtVT,
12069                                      LN0->getMemOperand());
12070     CombineTo(N, ExtLoad);
12071     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12072     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12073   }
12074
12075   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12076   // ignore it if the masked load is already sign extended
12077   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
12078     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12079         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12080         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
12081       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
12082           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12083           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12084           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12085       CombineTo(N, ExtMaskedLoad);
12086       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12087       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12088     }
12089   }
12090
12091   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12092   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12093     if (SDValue(GN0, 0).hasOneUse() &&
12094         ExtVT == GN0->getMemoryVT() &&
12095         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
12096       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
12097                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
12098
12099       SDValue ExtLoad = DAG.getMaskedGather(
12100           DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12101           GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12102
12103       CombineTo(N, ExtLoad);
12104       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12105       AddToWorklist(ExtLoad.getNode());
12106       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12107     }
12108   }
12109
12110   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12111   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12112     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
12113                                            N0.getOperand(1), false))
12114       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12115   }
12116
12117   return SDValue();
12118 }
12119
12120 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12121   SDValue N0 = N->getOperand(0);
12122   EVT VT = N->getValueType(0);
12123
12124   // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12125   if (N0.isUndef())
12126     return DAG.getConstant(0, SDLoc(N), VT);
12127
12128   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12129     return Res;
12130
12131   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12132     return SDValue(N, 0);
12133
12134   return SDValue();
12135 }
12136
12137 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12138   SDValue N0 = N->getOperand(0);
12139   EVT VT = N->getValueType(0);
12140   EVT SrcVT = N0.getValueType();
12141   bool isLE = DAG.getDataLayout().isLittleEndian();
12142
12143   // noop truncate
12144   if (SrcVT == VT)
12145     return N0;
12146
12147   // fold (truncate (truncate x)) -> (truncate x)
12148   if (N0.getOpcode() == ISD::TRUNCATE)
12149     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12150
12151   // fold (truncate c1) -> c1
12152   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
12153     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12154     if (C.getNode() != N)
12155       return C;
12156   }
12157
12158   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12159   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12160       N0.getOpcode() == ISD::SIGN_EXTEND ||
12161       N0.getOpcode() == ISD::ANY_EXTEND) {
12162     // if the source is smaller than the dest, we still need an extend.
12163     if (N0.getOperand(0).getValueType().bitsLT(VT))
12164       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12165     // if the source is larger than the dest, than we just need the truncate.
12166     if (N0.getOperand(0).getValueType().bitsGT(VT))
12167       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12168     // if the source and dest are the same type, we can drop both the extend
12169     // and the truncate.
12170     return N0.getOperand(0);
12171   }
12172
12173   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12174   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12175     return SDValue();
12176
12177   // Fold extract-and-trunc into a narrow extract. For example:
12178   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12179   //   i32 y = TRUNCATE(i64 x)
12180   //        -- becomes --
12181   //   v16i8 b = BITCAST (v2i64 val)
12182   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12183   //
12184   // Note: We only run this optimization after type legalization (which often
12185   // creates this pattern) and before operation legalization after which
12186   // we need to be more careful about the vector instructions that we generate.
12187   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12188       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12189     EVT VecTy = N0.getOperand(0).getValueType();
12190     EVT ExTy = N0.getValueType();
12191     EVT TrTy = N->getValueType(0);
12192
12193     auto EltCnt = VecTy.getVectorElementCount();
12194     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12195     auto NewEltCnt = EltCnt * SizeRatio;
12196
12197     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
12198     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
12199
12200     SDValue EltNo = N0->getOperand(1);
12201     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
12202       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12203       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12204
12205       SDLoc DL(N);
12206       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
12207                          DAG.getBitcast(NVT, N0.getOperand(0)),
12208                          DAG.getVectorIdxConstant(Index, DL));
12209     }
12210   }
12211
12212   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12213   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12214     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12215         TLI.isTruncateFree(SrcVT, VT)) {
12216       SDLoc SL(N0);
12217       SDValue Cond = N0.getOperand(0);
12218       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12219       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12220       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12221     }
12222   }
12223
12224   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12225   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12226       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12227       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12228     SDValue Amt = N0.getOperand(1);
12229     KnownBits Known = DAG.computeKnownBits(Amt);
12230     unsigned Size = VT.getScalarSizeInBits();
12231     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
12232       SDLoc SL(N);
12233       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12234
12235       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12236       if (AmtVT != Amt.getValueType()) {
12237         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12238         AddToWorklist(Amt.getNode());
12239       }
12240       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12241     }
12242   }
12243
12244   if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12245     return V;
12246
12247   // Attempt to pre-truncate BUILD_VECTOR sources.
12248   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12249       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12250       // Avoid creating illegal types if running after type legalizer.
12251       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12252     SDLoc DL(N);
12253     EVT SVT = VT.getScalarType();
12254     SmallVector<SDValue, 8> TruncOps;
12255     for (const SDValue &Op : N0->op_values()) {
12256       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12257       TruncOps.push_back(TruncOp);
12258     }
12259     return DAG.getBuildVector(VT, DL, TruncOps);
12260   }
12261
12262   // Fold a series of buildvector, bitcast, and truncate if possible.
12263   // For example fold
12264   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12265   //   (2xi32 (buildvector x, y)).
12266   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12267       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12268       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12269       N0.getOperand(0).hasOneUse()) {
12270     SDValue BuildVect = N0.getOperand(0);
12271     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12272     EVT TruncVecEltTy = VT.getVectorElementType();
12273
12274     // Check that the element types match.
12275     if (BuildVectEltTy == TruncVecEltTy) {
12276       // Now we only need to compute the offset of the truncated elements.
12277       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
12278       unsigned TruncVecNumElts = VT.getVectorNumElements();
12279       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12280
12281       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
12282              "Invalid number of elements");
12283
12284       SmallVector<SDValue, 8> Opnds;
12285       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12286         Opnds.push_back(BuildVect.getOperand(i));
12287
12288       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12289     }
12290   }
12291
12292   // See if we can simplify the input to this truncate through knowledge that
12293   // only the low bits are being used.
12294   // For example "trunc (or (shl x, 8), y)" // -> trunc y
12295   // Currently we only perform this optimization on scalars because vectors
12296   // may have different active low bits.
12297   if (!VT.isVector()) {
12298     APInt Mask =
12299         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
12300     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12301       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12302   }
12303
12304   // fold (truncate (load x)) -> (smaller load x)
12305   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12306   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12307     if (SDValue Reduced = ReduceLoadWidth(N))
12308       return Reduced;
12309
12310     // Handle the case where the load remains an extending load even
12311     // after truncation.
12312     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12313       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12314       if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12315         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12316                                          VT, LN0->getChain(), LN0->getBasePtr(),
12317                                          LN0->getMemoryVT(),
12318                                          LN0->getMemOperand());
12319         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12320         return NewLoad;
12321       }
12322     }
12323   }
12324
12325   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12326   // where ... are all 'undef'.
12327   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12328     SmallVector<EVT, 8> VTs;
12329     SDValue V;
12330     unsigned Idx = 0;
12331     unsigned NumDefs = 0;
12332
12333     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12334       SDValue X = N0.getOperand(i);
12335       if (!X.isUndef()) {
12336         V = X;
12337         Idx = i;
12338         NumDefs++;
12339       }
12340       // Stop if more than one members are non-undef.
12341       if (NumDefs > 1)
12342         break;
12343
12344       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12345                                      VT.getVectorElementType(),
12346                                      X.getValueType().getVectorElementCount()));
12347     }
12348
12349     if (NumDefs == 0)
12350       return DAG.getUNDEF(VT);
12351
12352     if (NumDefs == 1) {
12353       assert(V.getNode() && "The single defined operand is empty!");
12354       SmallVector<SDValue, 8> Opnds;
12355       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12356         if (i != Idx) {
12357           Opnds.push_back(DAG.getUNDEF(VTs[i]));
12358           continue;
12359         }
12360         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12361         AddToWorklist(NV.getNode());
12362         Opnds.push_back(NV);
12363       }
12364       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12365     }
12366   }
12367
12368   // Fold truncate of a bitcast of a vector to an extract of the low vector
12369   // element.
12370   //
12371   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12372   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12373     SDValue VecSrc = N0.getOperand(0);
12374     EVT VecSrcVT = VecSrc.getValueType();
12375     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12376         (!LegalOperations ||
12377          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12378       SDLoc SL(N);
12379
12380       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12381       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12382                          DAG.getVectorIdxConstant(Idx, SL));
12383     }
12384   }
12385
12386   // Simplify the operands using demanded-bits information.
12387   if (SimplifyDemandedBits(SDValue(N, 0)))
12388     return SDValue(N, 0);
12389
12390   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12391   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12392   // When the adde's carry is not used.
12393   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12394       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12395       // We only do for addcarry before legalize operation
12396       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12397        TLI.isOperationLegal(N0.getOpcode(), VT))) {
12398     SDLoc SL(N);
12399     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12400     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12401     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12402     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12403   }
12404
12405   // fold (truncate (extract_subvector(ext x))) ->
12406   //      (extract_subvector x)
12407   // TODO: This can be generalized to cover cases where the truncate and extract
12408   // do not fully cancel each other out.
12409   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12410     SDValue N00 = N0.getOperand(0);
12411     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12412         N00.getOpcode() == ISD::ZERO_EXTEND ||
12413         N00.getOpcode() == ISD::ANY_EXTEND) {
12414       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12415           VT.getVectorElementType())
12416         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12417                            N00.getOperand(0), N0.getOperand(1));
12418     }
12419   }
12420
12421   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12422     return NewVSel;
12423
12424   // Narrow a suitable binary operation with a non-opaque constant operand by
12425   // moving it ahead of the truncate. This is limited to pre-legalization
12426   // because targets may prefer a wider type during later combines and invert
12427   // this transform.
12428   switch (N0.getOpcode()) {
12429   case ISD::ADD:
12430   case ISD::SUB:
12431   case ISD::MUL:
12432   case ISD::AND:
12433   case ISD::OR:
12434   case ISD::XOR:
12435     if (!LegalOperations && N0.hasOneUse() &&
12436         (isConstantOrConstantVector(N0.getOperand(0), true) ||
12437          isConstantOrConstantVector(N0.getOperand(1), true))) {
12438       // TODO: We already restricted this to pre-legalization, but for vectors
12439       // we are extra cautious to not create an unsupported operation.
12440       // Target-specific changes are likely needed to avoid regressions here.
12441       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12442         SDLoc DL(N);
12443         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12444         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12445         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12446       }
12447     }
12448     break;
12449   case ISD::USUBSAT:
12450     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12451     // enough to know that the upper bits are zero we must ensure that we don't
12452     // introduce an extra truncate.
12453     if (!LegalOperations && N0.hasOneUse() &&
12454         N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12455         N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
12456             VT.getScalarSizeInBits() &&
12457         hasOperation(N0.getOpcode(), VT)) {
12458       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12459                                  DAG, SDLoc(N));
12460     }
12461     break;
12462   }
12463
12464   return SDValue();
12465 }
12466
12467 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
12468   SDValue Elt = N->getOperand(i);
12469   if (Elt.getOpcode() != ISD::MERGE_VALUES)
12470     return Elt.getNode();
12471   return Elt.getOperand(Elt.getResNo()).getNode();
12472 }
12473
12474 /// build_pair (load, load) -> load
12475 /// if load locations are consecutive.
12476 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
12477   assert(N->getOpcode() == ISD::BUILD_PAIR);
12478
12479   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
12480   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
12481
12482   // A BUILD_PAIR is always having the least significant part in elt 0 and the
12483   // most significant part in elt 1. So when combining into one large load, we
12484   // need to consider the endianness.
12485   if (DAG.getDataLayout().isBigEndian())
12486     std::swap(LD1, LD2);
12487
12488   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
12489       LD1->getAddressSpace() != LD2->getAddressSpace())
12490     return SDValue();
12491   EVT LD1VT = LD1->getValueType(0);
12492   unsigned LD1Bytes = LD1VT.getStoreSize();
12493   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
12494       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
12495     Align Alignment = LD1->getAlign();
12496     Align NewAlign = DAG.getDataLayout().getABITypeAlign(
12497         VT.getTypeForEVT(*DAG.getContext()));
12498
12499     if (NewAlign <= Alignment &&
12500         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
12501       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
12502                          LD1->getPointerInfo(), Alignment);
12503   }
12504
12505   return SDValue();
12506 }
12507
12508 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
12509   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
12510   // and Lo parts; on big-endian machines it doesn't.
12511   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
12512 }
12513
12514 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
12515                                     const TargetLowering &TLI) {
12516   // If this is not a bitcast to an FP type or if the target doesn't have
12517   // IEEE754-compliant FP logic, we're done.
12518   EVT VT = N->getValueType(0);
12519   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
12520     return SDValue();
12521
12522   // TODO: Handle cases where the integer constant is a different scalar
12523   // bitwidth to the FP.
12524   SDValue N0 = N->getOperand(0);
12525   EVT SourceVT = N0.getValueType();
12526   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
12527     return SDValue();
12528
12529   unsigned FPOpcode;
12530   APInt SignMask;
12531   switch (N0.getOpcode()) {
12532   case ISD::AND:
12533     FPOpcode = ISD::FABS;
12534     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
12535     break;
12536   case ISD::XOR:
12537     FPOpcode = ISD::FNEG;
12538     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12539     break;
12540   case ISD::OR:
12541     FPOpcode = ISD::FABS;
12542     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12543     break;
12544   default:
12545     return SDValue();
12546   }
12547
12548   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
12549   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
12550   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
12551   //   fneg (fabs X)
12552   SDValue LogicOp0 = N0.getOperand(0);
12553   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
12554   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
12555       LogicOp0.getOpcode() == ISD::BITCAST &&
12556       LogicOp0.getOperand(0).getValueType() == VT) {
12557     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
12558     NumFPLogicOpsConv++;
12559     if (N0.getOpcode() == ISD::OR)
12560       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
12561     return FPOp;
12562   }
12563
12564   return SDValue();
12565 }
12566
12567 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
12568   SDValue N0 = N->getOperand(0);
12569   EVT VT = N->getValueType(0);
12570
12571   if (N0.isUndef())
12572     return DAG.getUNDEF(VT);
12573
12574   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
12575   // Only do this before legalize types, unless both types are integer and the
12576   // scalar type is legal. Only do this before legalize ops, since the target
12577   // maybe depending on the bitcast.
12578   // First check to see if this is all constant.
12579   // TODO: Support FP bitcasts after legalize types.
12580   if (VT.isVector() &&
12581       (!LegalTypes ||
12582        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
12583         TLI.isTypeLegal(VT.getVectorElementType()))) &&
12584       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
12585       cast<BuildVectorSDNode>(N0)->isConstant())
12586     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
12587                                              VT.getVectorElementType());
12588
12589   // If the input is a constant, let getNode fold it.
12590   if (isIntOrFPConstant(N0)) {
12591     // If we can't allow illegal operations, we need to check that this is just
12592     // a fp -> int or int -> conversion and that the resulting operation will
12593     // be legal.
12594     if (!LegalOperations ||
12595         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
12596          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
12597         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
12598          TLI.isOperationLegal(ISD::Constant, VT))) {
12599       SDValue C = DAG.getBitcast(VT, N0);
12600       if (C.getNode() != N)
12601         return C;
12602     }
12603   }
12604
12605   // (conv (conv x, t1), t2) -> (conv x, t2)
12606   if (N0.getOpcode() == ISD::BITCAST)
12607     return DAG.getBitcast(VT, N0.getOperand(0));
12608
12609   // fold (conv (load x)) -> (load (conv*)x)
12610   // If the resultant load doesn't need a higher alignment than the original!
12611   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12612       // Do not remove the cast if the types differ in endian layout.
12613       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
12614           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
12615       // If the load is volatile, we only want to change the load type if the
12616       // resulting load is legal. Otherwise we might increase the number of
12617       // memory accesses. We don't care if the original type was legal or not
12618       // as we assume software couldn't rely on the number of accesses of an
12619       // illegal type.
12620       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
12621        TLI.isOperationLegal(ISD::LOAD, VT))) {
12622     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12623
12624     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
12625                                     *LN0->getMemOperand())) {
12626       SDValue Load =
12627           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12628                       LN0->getPointerInfo(), LN0->getAlign(),
12629                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12630       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12631       return Load;
12632     }
12633   }
12634
12635   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
12636     return V;
12637
12638   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12639   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12640   //
12641   // For ppc_fp128:
12642   // fold (bitcast (fneg x)) ->
12643   //     flipbit = signbit
12644   //     (xor (bitcast x) (build_pair flipbit, flipbit))
12645   //
12646   // fold (bitcast (fabs x)) ->
12647   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
12648   //     (xor (bitcast x) (build_pair flipbit, flipbit))
12649   // This often reduces constant pool loads.
12650   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
12651        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
12652       N0.getNode()->hasOneUse() && VT.isInteger() &&
12653       !VT.isVector() && !N0.getValueType().isVector()) {
12654     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
12655     AddToWorklist(NewConv.getNode());
12656
12657     SDLoc DL(N);
12658     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12659       assert(VT.getSizeInBits() == 128);
12660       SDValue SignBit = DAG.getConstant(
12661           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
12662       SDValue FlipBit;
12663       if (N0.getOpcode() == ISD::FNEG) {
12664         FlipBit = SignBit;
12665         AddToWorklist(FlipBit.getNode());
12666       } else {
12667         assert(N0.getOpcode() == ISD::FABS);
12668         SDValue Hi =
12669             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
12670                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12671                                               SDLoc(NewConv)));
12672         AddToWorklist(Hi.getNode());
12673         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
12674         AddToWorklist(FlipBit.getNode());
12675       }
12676       SDValue FlipBits =
12677           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12678       AddToWorklist(FlipBits.getNode());
12679       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
12680     }
12681     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12682     if (N0.getOpcode() == ISD::FNEG)
12683       return DAG.getNode(ISD::XOR, DL, VT,
12684                          NewConv, DAG.getConstant(SignBit, DL, VT));
12685     assert(N0.getOpcode() == ISD::FABS);
12686     return DAG.getNode(ISD::AND, DL, VT,
12687                        NewConv, DAG.getConstant(~SignBit, DL, VT));
12688   }
12689
12690   // fold (bitconvert (fcopysign cst, x)) ->
12691   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
12692   // Note that we don't handle (copysign x, cst) because this can always be
12693   // folded to an fneg or fabs.
12694   //
12695   // For ppc_fp128:
12696   // fold (bitcast (fcopysign cst, x)) ->
12697   //     flipbit = (and (extract_element
12698   //                     (xor (bitcast cst), (bitcast x)), 0),
12699   //                    signbit)
12700   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
12701   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
12702       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
12703       VT.isInteger() && !VT.isVector()) {
12704     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
12705     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
12706     if (isTypeLegal(IntXVT)) {
12707       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
12708       AddToWorklist(X.getNode());
12709
12710       // If X has a different width than the result/lhs, sext it or truncate it.
12711       unsigned VTWidth = VT.getSizeInBits();
12712       if (OrigXWidth < VTWidth) {
12713         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
12714         AddToWorklist(X.getNode());
12715       } else if (OrigXWidth > VTWidth) {
12716         // To get the sign bit in the right place, we have to shift it right
12717         // before truncating.
12718         SDLoc DL(X);
12719         X = DAG.getNode(ISD::SRL, DL,
12720                         X.getValueType(), X,
12721                         DAG.getConstant(OrigXWidth-VTWidth, DL,
12722                                         X.getValueType()));
12723         AddToWorklist(X.getNode());
12724         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
12725         AddToWorklist(X.getNode());
12726       }
12727
12728       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12729         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
12730         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12731         AddToWorklist(Cst.getNode());
12732         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
12733         AddToWorklist(X.getNode());
12734         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
12735         AddToWorklist(XorResult.getNode());
12736         SDValue XorResult64 = DAG.getNode(
12737             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
12738             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12739                                   SDLoc(XorResult)));
12740         AddToWorklist(XorResult64.getNode());
12741         SDValue FlipBit =
12742             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
12743                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
12744         AddToWorklist(FlipBit.getNode());
12745         SDValue FlipBits =
12746             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12747         AddToWorklist(FlipBits.getNode());
12748         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
12749       }
12750       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12751       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
12752                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
12753       AddToWorklist(X.getNode());
12754
12755       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12756       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
12757                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
12758       AddToWorklist(Cst.getNode());
12759
12760       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
12761     }
12762   }
12763
12764   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
12765   if (N0.getOpcode() == ISD::BUILD_PAIR)
12766     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
12767       return CombineLD;
12768
12769   // Remove double bitcasts from shuffles - this is often a legacy of
12770   // XformToShuffleWithZero being used to combine bitmaskings (of
12771   // float vectors bitcast to integer vectors) into shuffles.
12772   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
12773   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
12774       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
12775       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
12776       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
12777     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
12778
12779     // If operands are a bitcast, peek through if it casts the original VT.
12780     // If operands are a constant, just bitcast back to original VT.
12781     auto PeekThroughBitcast = [&](SDValue Op) {
12782       if (Op.getOpcode() == ISD::BITCAST &&
12783           Op.getOperand(0).getValueType() == VT)
12784         return SDValue(Op.getOperand(0));
12785       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
12786           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
12787         return DAG.getBitcast(VT, Op);
12788       return SDValue();
12789     };
12790
12791     // FIXME: If either input vector is bitcast, try to convert the shuffle to
12792     // the result type of this bitcast. This would eliminate at least one
12793     // bitcast. See the transform in InstCombine.
12794     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
12795     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
12796     if (!(SV0 && SV1))
12797       return SDValue();
12798
12799     int MaskScale =
12800         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
12801     SmallVector<int, 8> NewMask;
12802     for (int M : SVN->getMask())
12803       for (int i = 0; i != MaskScale; ++i)
12804         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
12805
12806     SDValue LegalShuffle =
12807         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
12808     if (LegalShuffle)
12809       return LegalShuffle;
12810   }
12811
12812   return SDValue();
12813 }
12814
12815 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
12816   EVT VT = N->getValueType(0);
12817   return CombineConsecutiveLoads(N, VT);
12818 }
12819
12820 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
12821   SDValue N0 = N->getOperand(0);
12822
12823   // (freeze (freeze x)) -> (freeze x)
12824   if (N0.getOpcode() == ISD::FREEZE)
12825     return N0;
12826
12827   // If the input is a constant, return it.
12828   if (isIntOrFPConstant(N0))
12829     return N0;
12830
12831   return SDValue();
12832 }
12833
12834 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
12835 /// operands. DstEltVT indicates the destination element value type.
12836 SDValue DAGCombiner::
12837 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
12838   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
12839
12840   // If this is already the right type, we're done.
12841   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
12842
12843   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
12844   unsigned DstBitSize = DstEltVT.getSizeInBits();
12845
12846   // If this is a conversion of N elements of one type to N elements of another
12847   // type, convert each element.  This handles FP<->INT cases.
12848   if (SrcBitSize == DstBitSize) {
12849     SmallVector<SDValue, 8> Ops;
12850     for (SDValue Op : BV->op_values()) {
12851       // If the vector element type is not legal, the BUILD_VECTOR operands
12852       // are promoted and implicitly truncated.  Make that explicit here.
12853       if (Op.getValueType() != SrcEltVT)
12854         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
12855       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
12856       AddToWorklist(Ops.back().getNode());
12857     }
12858     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12859                               BV->getValueType(0).getVectorNumElements());
12860     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
12861   }
12862
12863   // Otherwise, we're growing or shrinking the elements.  To avoid having to
12864   // handle annoying details of growing/shrinking FP values, we convert them to
12865   // int first.
12866   if (SrcEltVT.isFloatingPoint()) {
12867     // Convert the input float vector to a int vector where the elements are the
12868     // same sizes.
12869     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
12870     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
12871     SrcEltVT = IntVT;
12872   }
12873
12874   // Now we know the input is an integer vector.  If the output is a FP type,
12875   // convert to integer first, then to FP of the right size.
12876   if (DstEltVT.isFloatingPoint()) {
12877     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
12878     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
12879
12880     // Next, convert to FP elements of the same size.
12881     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
12882   }
12883
12884   SDLoc DL(BV);
12885
12886   // Okay, we know the src/dst types are both integers of differing types.
12887   // Handling growing first.
12888   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
12889   if (SrcBitSize < DstBitSize) {
12890     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
12891
12892     SmallVector<SDValue, 8> Ops;
12893     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
12894          i += NumInputsPerOutput) {
12895       bool isLE = DAG.getDataLayout().isLittleEndian();
12896       APInt NewBits = APInt(DstBitSize, 0);
12897       bool EltIsUndef = true;
12898       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
12899         // Shift the previously computed bits over.
12900         NewBits <<= SrcBitSize;
12901         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
12902         if (Op.isUndef()) continue;
12903         EltIsUndef = false;
12904
12905         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
12906                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
12907       }
12908
12909       if (EltIsUndef)
12910         Ops.push_back(DAG.getUNDEF(DstEltVT));
12911       else
12912         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
12913     }
12914
12915     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
12916     return DAG.getBuildVector(VT, DL, Ops);
12917   }
12918
12919   // Finally, this must be the case where we are shrinking elements: each input
12920   // turns into multiple outputs.
12921   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
12922   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12923                             NumOutputsPerInput*BV->getNumOperands());
12924   SmallVector<SDValue, 8> Ops;
12925
12926   for (const SDValue &Op : BV->op_values()) {
12927     if (Op.isUndef()) {
12928       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
12929       continue;
12930     }
12931
12932     APInt OpVal = cast<ConstantSDNode>(Op)->
12933                   getAPIntValue().zextOrTrunc(SrcBitSize);
12934
12935     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
12936       APInt ThisVal = OpVal.trunc(DstBitSize);
12937       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
12938       OpVal.lshrInPlace(DstBitSize);
12939     }
12940
12941     // For big endian targets, swap the order of the pieces of each element.
12942     if (DAG.getDataLayout().isBigEndian())
12943       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
12944   }
12945
12946   return DAG.getBuildVector(VT, DL, Ops);
12947 }
12948
12949 /// Try to perform FMA combining on a given FADD node.
12950 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
12951   SDValue N0 = N->getOperand(0);
12952   SDValue N1 = N->getOperand(1);
12953   EVT VT = N->getValueType(0);
12954   SDLoc SL(N);
12955
12956   const TargetOptions &Options = DAG.getTarget().Options;
12957
12958   // Floating-point multiply-add with intermediate rounding.
12959   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12960
12961   // Floating-point multiply-add without intermediate rounding.
12962   bool HasFMA =
12963       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12964       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12965
12966   // No valid opcode, do not combine.
12967   if (!HasFMAD && !HasFMA)
12968     return SDValue();
12969
12970   bool CanReassociate =
12971       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
12972   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
12973                               Options.UnsafeFPMath || HasFMAD);
12974   // If the addition is not contractable, do not combine.
12975   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
12976     return SDValue();
12977
12978   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
12979     return SDValue();
12980
12981   // Always prefer FMAD to FMA for precision.
12982   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12983   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12984
12985   // Is the node an FMUL and contractable either due to global flags or
12986   // SDNodeFlags.
12987   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12988     if (N.getOpcode() != ISD::FMUL)
12989       return false;
12990     return AllowFusionGlobally || N->getFlags().hasAllowContract();
12991   };
12992   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
12993   // prefer to fold the multiply with fewer uses.
12994   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
12995     if (N0.getNode()->use_size() > N1.getNode()->use_size())
12996       std::swap(N0, N1);
12997   }
12998
12999   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13000   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13001     return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13002                        N0.getOperand(1), N1);
13003   }
13004
13005   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13006   // Note: Commutes FADD operands.
13007   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13008     return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13009                        N1.getOperand(1), N0);
13010   }
13011
13012   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13013   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13014   // This requires reassociation because it changes the order of operations.
13015   SDValue FMA, E;
13016   if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
13017       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13018       N0.getOperand(2).hasOneUse()) {
13019     FMA = N0;
13020     E = N1;
13021   } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
13022              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13023              N1.getOperand(2).hasOneUse()) {
13024     FMA = N1;
13025     E = N0;
13026   }
13027   if (FMA && E) {
13028     SDValue A = FMA.getOperand(0);
13029     SDValue B = FMA.getOperand(1);
13030     SDValue C = FMA.getOperand(2).getOperand(0);
13031     SDValue D = FMA.getOperand(2).getOperand(1);
13032     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13033     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13034   }
13035
13036   // Look through FP_EXTEND nodes to do more combining.
13037
13038   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13039   if (N0.getOpcode() == ISD::FP_EXTEND) {
13040     SDValue N00 = N0.getOperand(0);
13041     if (isContractableFMUL(N00) &&
13042         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13043                             N00.getValueType())) {
13044       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13045                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13046                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13047                          N1);
13048     }
13049   }
13050
13051   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13052   // Note: Commutes FADD operands.
13053   if (N1.getOpcode() == ISD::FP_EXTEND) {
13054     SDValue N10 = N1.getOperand(0);
13055     if (isContractableFMUL(N10) &&
13056         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13057                             N10.getValueType())) {
13058       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13059                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13060                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13061                          N0);
13062     }
13063   }
13064
13065   // More folding opportunities when target permits.
13066   if (Aggressive) {
13067     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13068     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
13069     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13070                                     SDValue Z) {
13071       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13072                          DAG.getNode(PreferredFusedOpcode, SL, VT,
13073                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13074                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13075                                      Z));
13076     };
13077     if (N0.getOpcode() == PreferredFusedOpcode) {
13078       SDValue N02 = N0.getOperand(2);
13079       if (N02.getOpcode() == ISD::FP_EXTEND) {
13080         SDValue N020 = N02.getOperand(0);
13081         if (isContractableFMUL(N020) &&
13082             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13083                                 N020.getValueType())) {
13084           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13085                                       N020.getOperand(0), N020.getOperand(1),
13086                                       N1);
13087         }
13088       }
13089     }
13090
13091     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13092     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13093     // FIXME: This turns two single-precision and one double-precision
13094     // operation into two double-precision operations, which might not be
13095     // interesting for all targets, especially GPUs.
13096     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13097                                     SDValue Z) {
13098       return DAG.getNode(
13099           PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13100           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13101           DAG.getNode(PreferredFusedOpcode, SL, VT,
13102                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13103                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13104     };
13105     if (N0.getOpcode() == ISD::FP_EXTEND) {
13106       SDValue N00 = N0.getOperand(0);
13107       if (N00.getOpcode() == PreferredFusedOpcode) {
13108         SDValue N002 = N00.getOperand(2);
13109         if (isContractableFMUL(N002) &&
13110             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13111                                 N00.getValueType())) {
13112           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13113                                       N002.getOperand(0), N002.getOperand(1),
13114                                       N1);
13115         }
13116       }
13117     }
13118
13119     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13120     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
13121     if (N1.getOpcode() == PreferredFusedOpcode) {
13122       SDValue N12 = N1.getOperand(2);
13123       if (N12.getOpcode() == ISD::FP_EXTEND) {
13124         SDValue N120 = N12.getOperand(0);
13125         if (isContractableFMUL(N120) &&
13126             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13127                                 N120.getValueType())) {
13128           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13129                                       N120.getOperand(0), N120.getOperand(1),
13130                                       N0);
13131         }
13132       }
13133     }
13134
13135     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13136     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13137     // FIXME: This turns two single-precision and one double-precision
13138     // operation into two double-precision operations, which might not be
13139     // interesting for all targets, especially GPUs.
13140     if (N1.getOpcode() == ISD::FP_EXTEND) {
13141       SDValue N10 = N1.getOperand(0);
13142       if (N10.getOpcode() == PreferredFusedOpcode) {
13143         SDValue N102 = N10.getOperand(2);
13144         if (isContractableFMUL(N102) &&
13145             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13146                                 N10.getValueType())) {
13147           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13148                                       N102.getOperand(0), N102.getOperand(1),
13149                                       N0);
13150         }
13151       }
13152     }
13153   }
13154
13155   return SDValue();
13156 }
13157
13158 /// Try to perform FMA combining on a given FSUB node.
13159 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13160   SDValue N0 = N->getOperand(0);
13161   SDValue N1 = N->getOperand(1);
13162   EVT VT = N->getValueType(0);
13163   SDLoc SL(N);
13164
13165   const TargetOptions &Options = DAG.getTarget().Options;
13166   // Floating-point multiply-add with intermediate rounding.
13167   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13168
13169   // Floating-point multiply-add without intermediate rounding.
13170   bool HasFMA =
13171       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13172       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13173
13174   // No valid opcode, do not combine.
13175   if (!HasFMAD && !HasFMA)
13176     return SDValue();
13177
13178   const SDNodeFlags Flags = N->getFlags();
13179   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13180                               Options.UnsafeFPMath || HasFMAD);
13181
13182   // If the subtraction is not contractable, do not combine.
13183   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13184     return SDValue();
13185
13186   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13187     return SDValue();
13188
13189   // Always prefer FMAD to FMA for precision.
13190   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13191   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13192   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13193
13194   // Is the node an FMUL and contractable either due to global flags or
13195   // SDNodeFlags.
13196   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13197     if (N.getOpcode() != ISD::FMUL)
13198       return false;
13199     return AllowFusionGlobally || N->getFlags().hasAllowContract();
13200   };
13201
13202   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13203   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13204     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13205       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13206                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13207     }
13208     return SDValue();
13209   };
13210
13211   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13212   // Note: Commutes FSUB operands.
13213   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13214     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13215       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13216                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13217                          YZ.getOperand(1), X);
13218     }
13219     return SDValue();
13220   };
13221
13222   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13223   // prefer to fold the multiply with fewer uses.
13224   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13225       (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13226     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13227     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13228       return V;
13229     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13230     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13231       return V;
13232   } else {
13233     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13234     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13235       return V;
13236     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13237     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13238       return V;
13239   }
13240
13241   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13242   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13243       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13244     SDValue N00 = N0.getOperand(0).getOperand(0);
13245     SDValue N01 = N0.getOperand(0).getOperand(1);
13246     return DAG.getNode(PreferredFusedOpcode, SL, VT,
13247                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13248                        DAG.getNode(ISD::FNEG, SL, VT, N1));
13249   }
13250
13251   // Look through FP_EXTEND nodes to do more combining.
13252
13253   // fold (fsub (fpext (fmul x, y)), z)
13254   //   -> (fma (fpext x), (fpext y), (fneg z))
13255   if (N0.getOpcode() == ISD::FP_EXTEND) {
13256     SDValue N00 = N0.getOperand(0);
13257     if (isContractableFMUL(N00) &&
13258         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13259                             N00.getValueType())) {
13260       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13261                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13262                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13263                          DAG.getNode(ISD::FNEG, SL, VT, N1));
13264     }
13265   }
13266
13267   // fold (fsub x, (fpext (fmul y, z)))
13268   //   -> (fma (fneg (fpext y)), (fpext z), x)
13269   // Note: Commutes FSUB operands.
13270   if (N1.getOpcode() == ISD::FP_EXTEND) {
13271     SDValue N10 = N1.getOperand(0);
13272     if (isContractableFMUL(N10) &&
13273         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13274                             N10.getValueType())) {
13275       return DAG.getNode(
13276           PreferredFusedOpcode, SL, VT,
13277           DAG.getNode(ISD::FNEG, SL, VT,
13278                       DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13279           DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13280     }
13281   }
13282
13283   // fold (fsub (fpext (fneg (fmul, x, y))), z)
13284   //   -> (fneg (fma (fpext x), (fpext y), z))
13285   // Note: This could be removed with appropriate canonicalization of the
13286   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13287   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13288   // from implementing the canonicalization in visitFSUB.
13289   if (N0.getOpcode() == ISD::FP_EXTEND) {
13290     SDValue N00 = N0.getOperand(0);
13291     if (N00.getOpcode() == ISD::FNEG) {
13292       SDValue N000 = N00.getOperand(0);
13293       if (isContractableFMUL(N000) &&
13294           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13295                               N00.getValueType())) {
13296         return DAG.getNode(
13297             ISD::FNEG, SL, VT,
13298             DAG.getNode(PreferredFusedOpcode, SL, VT,
13299                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13300                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13301                         N1));
13302       }
13303     }
13304   }
13305
13306   // fold (fsub (fneg (fpext (fmul, x, y))), z)
13307   //   -> (fneg (fma (fpext x)), (fpext y), z)
13308   // Note: This could be removed with appropriate canonicalization of the
13309   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13310   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13311   // from implementing the canonicalization in visitFSUB.
13312   if (N0.getOpcode() == ISD::FNEG) {
13313     SDValue N00 = N0.getOperand(0);
13314     if (N00.getOpcode() == ISD::FP_EXTEND) {
13315       SDValue N000 = N00.getOperand(0);
13316       if (isContractableFMUL(N000) &&
13317           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13318                               N000.getValueType())) {
13319         return DAG.getNode(
13320             ISD::FNEG, SL, VT,
13321             DAG.getNode(PreferredFusedOpcode, SL, VT,
13322                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13323                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13324                         N1));
13325       }
13326     }
13327   }
13328
13329   auto isReassociable = [Options](SDNode *N) {
13330     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13331   };
13332
13333   auto isContractableAndReassociableFMUL = [isContractableFMUL,
13334                                             isReassociable](SDValue N) {
13335     return isContractableFMUL(N) && isReassociable(N.getNode());
13336   };
13337
13338   // More folding opportunities when target permits.
13339   if (Aggressive && isReassociable(N)) {
13340     bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
13341     // fold (fsub (fma x, y, (fmul u, v)), z)
13342     //   -> (fma x, y (fma u, v, (fneg z)))
13343     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
13344         isContractableAndReassociableFMUL(N0.getOperand(2)) &&
13345         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
13346       return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13347                          N0.getOperand(1),
13348                          DAG.getNode(PreferredFusedOpcode, SL, VT,
13349                                      N0.getOperand(2).getOperand(0),
13350                                      N0.getOperand(2).getOperand(1),
13351                                      DAG.getNode(ISD::FNEG, SL, VT, N1)));
13352     }
13353
13354     // fold (fsub x, (fma y, z, (fmul u, v)))
13355     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
13356     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
13357         isContractableAndReassociableFMUL(N1.getOperand(2)) &&
13358         N1->hasOneUse() && NoSignedZero) {
13359       SDValue N20 = N1.getOperand(2).getOperand(0);
13360       SDValue N21 = N1.getOperand(2).getOperand(1);
13361       return DAG.getNode(
13362           PreferredFusedOpcode, SL, VT,
13363           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13364           DAG.getNode(PreferredFusedOpcode, SL, VT,
13365                       DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13366     }
13367
13368     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13369     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13370     if (N0.getOpcode() == PreferredFusedOpcode &&
13371         N0->hasOneUse()) {
13372       SDValue N02 = N0.getOperand(2);
13373       if (N02.getOpcode() == ISD::FP_EXTEND) {
13374         SDValue N020 = N02.getOperand(0);
13375         if (isContractableAndReassociableFMUL(N020) &&
13376             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13377                                 N020.getValueType())) {
13378           return DAG.getNode(
13379               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13380               DAG.getNode(
13381                   PreferredFusedOpcode, SL, VT,
13382                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13383                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13384                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
13385         }
13386       }
13387     }
13388
13389     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13390     //   -> (fma (fpext x), (fpext y),
13391     //           (fma (fpext u), (fpext v), (fneg z)))
13392     // FIXME: This turns two single-precision and one double-precision
13393     // operation into two double-precision operations, which might not be
13394     // interesting for all targets, especially GPUs.
13395     if (N0.getOpcode() == ISD::FP_EXTEND) {
13396       SDValue N00 = N0.getOperand(0);
13397       if (N00.getOpcode() == PreferredFusedOpcode) {
13398         SDValue N002 = N00.getOperand(2);
13399         if (isContractableAndReassociableFMUL(N002) &&
13400             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13401                                 N00.getValueType())) {
13402           return DAG.getNode(
13403               PreferredFusedOpcode, SL, VT,
13404               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13405               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13406               DAG.getNode(
13407                   PreferredFusedOpcode, SL, VT,
13408                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13409                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13410                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
13411         }
13412       }
13413     }
13414
13415     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13416     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13417     if (N1.getOpcode() == PreferredFusedOpcode &&
13418         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13419         N1->hasOneUse()) {
13420       SDValue N120 = N1.getOperand(2).getOperand(0);
13421       if (isContractableAndReassociableFMUL(N120) &&
13422           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13423                               N120.getValueType())) {
13424         SDValue N1200 = N120.getOperand(0);
13425         SDValue N1201 = N120.getOperand(1);
13426         return DAG.getNode(
13427             PreferredFusedOpcode, SL, VT,
13428             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13429             DAG.getNode(PreferredFusedOpcode, SL, VT,
13430                         DAG.getNode(ISD::FNEG, SL, VT,
13431                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13432                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13433       }
13434     }
13435
13436     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13437     //   -> (fma (fneg (fpext y)), (fpext z),
13438     //           (fma (fneg (fpext u)), (fpext v), x))
13439     // FIXME: This turns two single-precision and one double-precision
13440     // operation into two double-precision operations, which might not be
13441     // interesting for all targets, especially GPUs.
13442     if (N1.getOpcode() == ISD::FP_EXTEND &&
13443         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
13444       SDValue CvtSrc = N1.getOperand(0);
13445       SDValue N100 = CvtSrc.getOperand(0);
13446       SDValue N101 = CvtSrc.getOperand(1);
13447       SDValue N102 = CvtSrc.getOperand(2);
13448       if (isContractableAndReassociableFMUL(N102) &&
13449           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13450                               CvtSrc.getValueType())) {
13451         SDValue N1020 = N102.getOperand(0);
13452         SDValue N1021 = N102.getOperand(1);
13453         return DAG.getNode(
13454             PreferredFusedOpcode, SL, VT,
13455             DAG.getNode(ISD::FNEG, SL, VT,
13456                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13457             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13458             DAG.getNode(PreferredFusedOpcode, SL, VT,
13459                         DAG.getNode(ISD::FNEG, SL, VT,
13460                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13461                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13462       }
13463     }
13464   }
13465
13466   return SDValue();
13467 }
13468
13469 /// Try to perform FMA combining on a given FMUL node based on the distributive
13470 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13471 /// subtraction instead of addition).
13472 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13473   SDValue N0 = N->getOperand(0);
13474   SDValue N1 = N->getOperand(1);
13475   EVT VT = N->getValueType(0);
13476   SDLoc SL(N);
13477
13478   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
13479
13480   const TargetOptions &Options = DAG.getTarget().Options;
13481
13482   // The transforms below are incorrect when x == 0 and y == inf, because the
13483   // intermediate multiplication produces a nan.
13484   if (!Options.NoInfsFPMath)
13485     return SDValue();
13486
13487   // Floating-point multiply-add without intermediate rounding.
13488   bool HasFMA =
13489       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
13490       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13491       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13492
13493   // Floating-point multiply-add with intermediate rounding. This can result
13494   // in a less precise result due to the changed rounding order.
13495   bool HasFMAD = Options.UnsafeFPMath &&
13496                  (LegalOperations && TLI.isFMADLegal(DAG, N));
13497
13498   // No valid opcode, do not combine.
13499   if (!HasFMAD && !HasFMA)
13500     return SDValue();
13501
13502   // Always prefer FMAD to FMA for precision.
13503   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13504   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13505
13506   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
13507   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
13508   auto FuseFADD = [&](SDValue X, SDValue Y) {
13509     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
13510       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
13511         if (C->isExactlyValue(+1.0))
13512           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13513                              Y);
13514         if (C->isExactlyValue(-1.0))
13515           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13516                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13517       }
13518     }
13519     return SDValue();
13520   };
13521
13522   if (SDValue FMA = FuseFADD(N0, N1))
13523     return FMA;
13524   if (SDValue FMA = FuseFADD(N1, N0))
13525     return FMA;
13526
13527   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
13528   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
13529   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
13530   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
13531   auto FuseFSUB = [&](SDValue X, SDValue Y) {
13532     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
13533       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
13534         if (C0->isExactlyValue(+1.0))
13535           return DAG.getNode(PreferredFusedOpcode, SL, VT,
13536                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13537                              Y);
13538         if (C0->isExactlyValue(-1.0))
13539           return DAG.getNode(PreferredFusedOpcode, SL, VT,
13540                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13541                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13542       }
13543       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
13544         if (C1->isExactlyValue(+1.0))
13545           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13546                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13547         if (C1->isExactlyValue(-1.0))
13548           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13549                              Y);
13550       }
13551     }
13552     return SDValue();
13553   };
13554
13555   if (SDValue FMA = FuseFSUB(N0, N1))
13556     return FMA;
13557   if (SDValue FMA = FuseFSUB(N1, N0))
13558     return FMA;
13559
13560   return SDValue();
13561 }
13562
13563 SDValue DAGCombiner::visitFADD(SDNode *N) {
13564   SDValue N0 = N->getOperand(0);
13565   SDValue N1 = N->getOperand(1);
13566   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
13567   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
13568   EVT VT = N->getValueType(0);
13569   SDLoc DL(N);
13570   const TargetOptions &Options = DAG.getTarget().Options;
13571   SDNodeFlags Flags = N->getFlags();
13572   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13573
13574   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13575     return R;
13576
13577   // fold vector ops
13578   if (VT.isVector())
13579     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13580       return FoldedVOp;
13581
13582   // fold (fadd c1, c2) -> c1 + c2
13583   if (N0CFP && N1CFP)
13584     return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
13585
13586   // canonicalize constant to RHS
13587   if (N0CFP && !N1CFP)
13588     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
13589
13590   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
13591   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
13592   if (N1C && N1C->isZero())
13593     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
13594       return N0;
13595
13596   if (SDValue NewSel = foldBinOpIntoSelect(N))
13597     return NewSel;
13598
13599   // fold (fadd A, (fneg B)) -> (fsub A, B)
13600   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13601     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13602             N1, DAG, LegalOperations, ForCodeSize))
13603       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
13604
13605   // fold (fadd (fneg A), B) -> (fsub B, A)
13606   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13607     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13608             N0, DAG, LegalOperations, ForCodeSize))
13609       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
13610
13611   auto isFMulNegTwo = [](SDValue FMul) {
13612     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
13613       return false;
13614     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
13615     return C && C->isExactlyValue(-2.0);
13616   };
13617
13618   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
13619   if (isFMulNegTwo(N0)) {
13620     SDValue B = N0.getOperand(0);
13621     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13622     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
13623   }
13624   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
13625   if (isFMulNegTwo(N1)) {
13626     SDValue B = N1.getOperand(0);
13627     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13628     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
13629   }
13630
13631   // No FP constant should be created after legalization as Instruction
13632   // Selection pass has a hard time dealing with FP constants.
13633   bool AllowNewConst = (Level < AfterLegalizeDAG);
13634
13635   // If nnan is enabled, fold lots of things.
13636   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
13637     // If allowed, fold (fadd (fneg x), x) -> 0.0
13638     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
13639       return DAG.getConstantFP(0.0, DL, VT);
13640
13641     // If allowed, fold (fadd x, (fneg x)) -> 0.0
13642     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
13643       return DAG.getConstantFP(0.0, DL, VT);
13644   }
13645
13646   // If 'unsafe math' or reassoc and nsz, fold lots of things.
13647   // TODO: break out portions of the transformations below for which Unsafe is
13648   //       considered and which do not require both nsz and reassoc
13649   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13650        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13651       AllowNewConst) {
13652     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
13653     if (N1CFP && N0.getOpcode() == ISD::FADD &&
13654         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13655       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
13656       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
13657     }
13658
13659     // We can fold chains of FADD's of the same value into multiplications.
13660     // This transform is not safe in general because we are reducing the number
13661     // of rounding steps.
13662     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
13663       if (N0.getOpcode() == ISD::FMUL) {
13664         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13665         bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
13666
13667         // (fadd (fmul x, c), x) -> (fmul x, c+1)
13668         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
13669           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13670                                        DAG.getConstantFP(1.0, DL, VT));
13671           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
13672         }
13673
13674         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
13675         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
13676             N1.getOperand(0) == N1.getOperand(1) &&
13677             N0.getOperand(0) == N1.getOperand(0)) {
13678           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13679                                        DAG.getConstantFP(2.0, DL, VT));
13680           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
13681         }
13682       }
13683
13684       if (N1.getOpcode() == ISD::FMUL) {
13685         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13686         bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
13687
13688         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
13689         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
13690           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13691                                        DAG.getConstantFP(1.0, DL, VT));
13692           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
13693         }
13694
13695         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
13696         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
13697             N0.getOperand(0) == N0.getOperand(1) &&
13698             N1.getOperand(0) == N0.getOperand(0)) {
13699           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13700                                        DAG.getConstantFP(2.0, DL, VT));
13701           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
13702         }
13703       }
13704
13705       if (N0.getOpcode() == ISD::FADD) {
13706         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13707         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
13708         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
13709             (N0.getOperand(0) == N1)) {
13710           return DAG.getNode(ISD::FMUL, DL, VT, N1,
13711                              DAG.getConstantFP(3.0, DL, VT));
13712         }
13713       }
13714
13715       if (N1.getOpcode() == ISD::FADD) {
13716         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13717         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
13718         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
13719             N1.getOperand(0) == N0) {
13720           return DAG.getNode(ISD::FMUL, DL, VT, N0,
13721                              DAG.getConstantFP(3.0, DL, VT));
13722         }
13723       }
13724
13725       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
13726       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
13727           N0.getOperand(0) == N0.getOperand(1) &&
13728           N1.getOperand(0) == N1.getOperand(1) &&
13729           N0.getOperand(0) == N1.getOperand(0)) {
13730         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
13731                            DAG.getConstantFP(4.0, DL, VT));
13732       }
13733     }
13734   } // enable-unsafe-fp-math
13735
13736   // FADD -> FMA combines:
13737   if (SDValue Fused = visitFADDForFMACombine(N)) {
13738     AddToWorklist(Fused.getNode());
13739     return Fused;
13740   }
13741   return SDValue();
13742 }
13743
13744 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
13745   SDValue Chain = N->getOperand(0);
13746   SDValue N0 = N->getOperand(1);
13747   SDValue N1 = N->getOperand(2);
13748   EVT VT = N->getValueType(0);
13749   EVT ChainVT = N->getValueType(1);
13750   SDLoc DL(N);
13751   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13752
13753   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
13754   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13755     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13756             N1, DAG, LegalOperations, ForCodeSize)) {
13757       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13758                          {Chain, N0, NegN1});
13759     }
13760
13761   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
13762   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13763     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13764             N0, DAG, LegalOperations, ForCodeSize)) {
13765       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13766                          {Chain, N1, NegN0});
13767     }
13768   return SDValue();
13769 }
13770
13771 SDValue DAGCombiner::visitFSUB(SDNode *N) {
13772   SDValue N0 = N->getOperand(0);
13773   SDValue N1 = N->getOperand(1);
13774   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13775   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13776   EVT VT = N->getValueType(0);
13777   SDLoc DL(N);
13778   const TargetOptions &Options = DAG.getTarget().Options;
13779   const SDNodeFlags Flags = N->getFlags();
13780   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13781
13782   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13783     return R;
13784
13785   // fold vector ops
13786   if (VT.isVector())
13787     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13788       return FoldedVOp;
13789
13790   // fold (fsub c1, c2) -> c1-c2
13791   if (N0CFP && N1CFP)
13792     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
13793
13794   if (SDValue NewSel = foldBinOpIntoSelect(N))
13795     return NewSel;
13796
13797   // (fsub A, 0) -> A
13798   if (N1CFP && N1CFP->isZero()) {
13799     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
13800         Flags.hasNoSignedZeros()) {
13801       return N0;
13802     }
13803   }
13804
13805   if (N0 == N1) {
13806     // (fsub x, x) -> 0.0
13807     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
13808       return DAG.getConstantFP(0.0f, DL, VT);
13809   }
13810
13811   // (fsub -0.0, N1) -> -N1
13812   if (N0CFP && N0CFP->isZero()) {
13813     if (N0CFP->isNegative() ||
13814         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
13815       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
13816       // flushed to zero, unless all users treat denorms as zero (DAZ).
13817       // FIXME: This transform will change the sign of a NaN and the behavior
13818       // of a signaling NaN. It is only valid when a NoNaN flag is present.
13819       DenormalMode DenormMode = DAG.getDenormalMode(VT);
13820       if (DenormMode == DenormalMode::getIEEE()) {
13821         if (SDValue NegN1 =
13822                 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13823           return NegN1;
13824         if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13825           return DAG.getNode(ISD::FNEG, DL, VT, N1);
13826       }
13827     }
13828   }
13829
13830   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13831        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13832       N1.getOpcode() == ISD::FADD) {
13833     // X - (X + Y) -> -Y
13834     if (N0 == N1->getOperand(0))
13835       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
13836     // X - (Y + X) -> -Y
13837     if (N0 == N1->getOperand(1))
13838       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
13839   }
13840
13841   // fold (fsub A, (fneg B)) -> (fadd A, B)
13842   if (SDValue NegN1 =
13843           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13844     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
13845
13846   // FSUB -> FMA combines:
13847   if (SDValue Fused = visitFSUBForFMACombine(N)) {
13848     AddToWorklist(Fused.getNode());
13849     return Fused;
13850   }
13851
13852   return SDValue();
13853 }
13854
13855 SDValue DAGCombiner::visitFMUL(SDNode *N) {
13856   SDValue N0 = N->getOperand(0);
13857   SDValue N1 = N->getOperand(1);
13858   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13859   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13860   EVT VT = N->getValueType(0);
13861   SDLoc DL(N);
13862   const TargetOptions &Options = DAG.getTarget().Options;
13863   const SDNodeFlags Flags = N->getFlags();
13864   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13865
13866   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13867     return R;
13868
13869   // fold vector ops
13870   if (VT.isVector()) {
13871     // This just handles C1 * C2 for vectors. Other vector folds are below.
13872     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13873       return FoldedVOp;
13874   }
13875
13876   // fold (fmul c1, c2) -> c1*c2
13877   if (N0CFP && N1CFP)
13878     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
13879
13880   // canonicalize constant to RHS
13881   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
13882      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
13883     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
13884
13885   if (SDValue NewSel = foldBinOpIntoSelect(N))
13886     return NewSel;
13887
13888   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
13889     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
13890     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13891         N0.getOpcode() == ISD::FMUL) {
13892       SDValue N00 = N0.getOperand(0);
13893       SDValue N01 = N0.getOperand(1);
13894       // Avoid an infinite loop by making sure that N00 is not a constant
13895       // (the inner multiply has not been constant folded yet).
13896       if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
13897           !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
13898         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
13899         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
13900       }
13901     }
13902
13903     // Match a special-case: we convert X * 2.0 into fadd.
13904     // fmul (fadd X, X), C -> fmul X, 2.0 * C
13905     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
13906         N0.getOperand(0) == N0.getOperand(1)) {
13907       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
13908       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
13909       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
13910     }
13911   }
13912
13913   // fold (fmul X, 2.0) -> (fadd X, X)
13914   if (N1CFP && N1CFP->isExactlyValue(+2.0))
13915     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
13916
13917   // fold (fmul X, -1.0) -> (fneg X)
13918   if (N1CFP && N1CFP->isExactlyValue(-1.0))
13919     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13920       return DAG.getNode(ISD::FNEG, DL, VT, N0);
13921
13922   // -N0 * -N1 --> N0 * N1
13923   TargetLowering::NegatibleCost CostN0 =
13924       TargetLowering::NegatibleCost::Expensive;
13925   TargetLowering::NegatibleCost CostN1 =
13926       TargetLowering::NegatibleCost::Expensive;
13927   SDValue NegN0 =
13928       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13929   SDValue NegN1 =
13930       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13931   if (NegN0 && NegN1 &&
13932       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13933        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13934     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
13935
13936   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
13937   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
13938   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
13939       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
13940       TLI.isOperationLegal(ISD::FABS, VT)) {
13941     SDValue Select = N0, X = N1;
13942     if (Select.getOpcode() != ISD::SELECT)
13943       std::swap(Select, X);
13944
13945     SDValue Cond = Select.getOperand(0);
13946     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
13947     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
13948
13949     if (TrueOpnd && FalseOpnd &&
13950         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
13951         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
13952         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
13953       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13954       switch (CC) {
13955       default: break;
13956       case ISD::SETOLT:
13957       case ISD::SETULT:
13958       case ISD::SETOLE:
13959       case ISD::SETULE:
13960       case ISD::SETLT:
13961       case ISD::SETLE:
13962         std::swap(TrueOpnd, FalseOpnd);
13963         LLVM_FALLTHROUGH;
13964       case ISD::SETOGT:
13965       case ISD::SETUGT:
13966       case ISD::SETOGE:
13967       case ISD::SETUGE:
13968       case ISD::SETGT:
13969       case ISD::SETGE:
13970         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
13971             TLI.isOperationLegal(ISD::FNEG, VT))
13972           return DAG.getNode(ISD::FNEG, DL, VT,
13973                    DAG.getNode(ISD::FABS, DL, VT, X));
13974         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
13975           return DAG.getNode(ISD::FABS, DL, VT, X);
13976
13977         break;
13978       }
13979     }
13980   }
13981
13982   // FMUL -> FMA combines:
13983   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
13984     AddToWorklist(Fused.getNode());
13985     return Fused;
13986   }
13987
13988   return SDValue();
13989 }
13990
13991 SDValue DAGCombiner::visitFMA(SDNode *N) {
13992   SDValue N0 = N->getOperand(0);
13993   SDValue N1 = N->getOperand(1);
13994   SDValue N2 = N->getOperand(2);
13995   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13996   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13997   EVT VT = N->getValueType(0);
13998   SDLoc DL(N);
13999   const TargetOptions &Options = DAG.getTarget().Options;
14000   // FMA nodes have flags that propagate to the created nodes.
14001   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14002
14003   bool UnsafeFPMath =
14004       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14005
14006   // Constant fold FMA.
14007   if (isa<ConstantFPSDNode>(N0) &&
14008       isa<ConstantFPSDNode>(N1) &&
14009       isa<ConstantFPSDNode>(N2)) {
14010     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14011   }
14012
14013   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14014   TargetLowering::NegatibleCost CostN0 =
14015       TargetLowering::NegatibleCost::Expensive;
14016   TargetLowering::NegatibleCost CostN1 =
14017       TargetLowering::NegatibleCost::Expensive;
14018   SDValue NegN0 =
14019       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14020   SDValue NegN1 =
14021       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14022   if (NegN0 && NegN1 &&
14023       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14024        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14025     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14026
14027   if (UnsafeFPMath) {
14028     if (N0CFP && N0CFP->isZero())
14029       return N2;
14030     if (N1CFP && N1CFP->isZero())
14031       return N2;
14032   }
14033
14034   if (N0CFP && N0CFP->isExactlyValue(1.0))
14035     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14036   if (N1CFP && N1CFP->isExactlyValue(1.0))
14037     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14038
14039   // Canonicalize (fma c, x, y) -> (fma x, c, y)
14040   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14041      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14042     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14043
14044   if (UnsafeFPMath) {
14045     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14046     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14047         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14048         DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
14049       return DAG.getNode(ISD::FMUL, DL, VT, N0,
14050                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14051     }
14052
14053     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14054     if (N0.getOpcode() == ISD::FMUL &&
14055         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14056         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14057       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14058                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14059                          N2);
14060     }
14061   }
14062
14063   // (fma x, -1, y) -> (fadd (fneg x), y)
14064   if (N1CFP) {
14065     if (N1CFP->isExactlyValue(1.0))
14066       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14067
14068     if (N1CFP->isExactlyValue(-1.0) &&
14069         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14070       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14071       AddToWorklist(RHSNeg.getNode());
14072       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14073     }
14074
14075     // fma (fneg x), K, y -> fma x -K, y
14076     if (N0.getOpcode() == ISD::FNEG &&
14077         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14078          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14079                                               ForCodeSize)))) {
14080       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14081                          DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14082     }
14083   }
14084
14085   if (UnsafeFPMath) {
14086     // (fma x, c, x) -> (fmul x, (c+1))
14087     if (N1CFP && N0 == N2) {
14088       return DAG.getNode(
14089           ISD::FMUL, DL, VT, N0,
14090           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14091     }
14092
14093     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14094     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14095       return DAG.getNode(
14096           ISD::FMUL, DL, VT, N0,
14097           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14098     }
14099   }
14100
14101   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14102   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14103   if (!TLI.isFNegFree(VT))
14104     if (SDValue Neg = TLI.getCheaperNegatedExpression(
14105             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14106       return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14107   return SDValue();
14108 }
14109
14110 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14111 // reciprocal.
14112 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14113 // Notice that this is not always beneficial. One reason is different targets
14114 // may have different costs for FDIV and FMUL, so sometimes the cost of two
14115 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14116 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14117 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14118   // TODO: Limit this transform based on optsize/minsize - it always creates at
14119   //       least 1 extra instruction. But the perf win may be substantial enough
14120   //       that only minsize should restrict this.
14121   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
14122   const SDNodeFlags Flags = N->getFlags();
14123   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14124     return SDValue();
14125
14126   // Skip if current node is a reciprocal/fneg-reciprocal.
14127   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14128   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14129   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14130     return SDValue();
14131
14132   // Exit early if the target does not want this transform or if there can't
14133   // possibly be enough uses of the divisor to make the transform worthwhile.
14134   unsigned MinUses = TLI.combineRepeatedFPDivisors();
14135
14136   // For splat vectors, scale the number of uses by the splat factor. If we can
14137   // convert the division into a scalar op, that will likely be much faster.
14138   unsigned NumElts = 1;
14139   EVT VT = N->getValueType(0);
14140   if (VT.isVector() && DAG.isSplatValue(N1))
14141     NumElts = VT.getVectorNumElements();
14142
14143   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14144     return SDValue();
14145
14146   // Find all FDIV users of the same divisor.
14147   // Use a set because duplicates may be present in the user list.
14148   SetVector<SDNode *> Users;
14149   for (auto *U : N1->uses()) {
14150     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14151       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14152       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14153           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14154           U->getFlags().hasAllowReassociation() &&
14155           U->getFlags().hasNoSignedZeros())
14156         continue;
14157
14158       // This division is eligible for optimization only if global unsafe math
14159       // is enabled or if this division allows reciprocal formation.
14160       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14161         Users.insert(U);
14162     }
14163   }
14164
14165   // Now that we have the actual number of divisor uses, make sure it meets
14166   // the minimum threshold specified by the target.
14167   if ((Users.size() * NumElts) < MinUses)
14168     return SDValue();
14169
14170   SDLoc DL(N);
14171   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14172   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14173
14174   // Dividend / Divisor -> Dividend * Reciprocal
14175   for (auto *U : Users) {
14176     SDValue Dividend = U->getOperand(0);
14177     if (Dividend != FPOne) {
14178       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14179                                     Reciprocal, Flags);
14180       CombineTo(U, NewNode);
14181     } else if (U != Reciprocal.getNode()) {
14182       // In the absence of fast-math-flags, this user node is always the
14183       // same node as Reciprocal, but with FMF they may be different nodes.
14184       CombineTo(U, Reciprocal);
14185     }
14186   }
14187   return SDValue(N, 0);  // N was replaced.
14188 }
14189
14190 SDValue DAGCombiner::visitFDIV(SDNode *N) {
14191   SDValue N0 = N->getOperand(0);
14192   SDValue N1 = N->getOperand(1);
14193   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14194   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14195   EVT VT = N->getValueType(0);
14196   SDLoc DL(N);
14197   const TargetOptions &Options = DAG.getTarget().Options;
14198   SDNodeFlags Flags = N->getFlags();
14199   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14200
14201   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14202     return R;
14203
14204   // fold vector ops
14205   if (VT.isVector())
14206     if (SDValue FoldedVOp = SimplifyVBinOp(N))
14207       return FoldedVOp;
14208
14209   // fold (fdiv c1, c2) -> c1/c2
14210   if (N0CFP && N1CFP)
14211     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
14212
14213   if (SDValue NewSel = foldBinOpIntoSelect(N))
14214     return NewSel;
14215
14216   if (SDValue V = combineRepeatedFPDivisors(N))
14217     return V;
14218
14219   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14220     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14221     if (N1CFP) {
14222       // Compute the reciprocal 1.0 / c2.
14223       const APFloat &N1APF = N1CFP->getValueAPF();
14224       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14225       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14226       // Only do the transform if the reciprocal is a legal fp immediate that
14227       // isn't too nasty (eg NaN, denormal, ...).
14228       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14229           (!LegalOperations ||
14230            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14231            // backend)... we should handle this gracefully after Legalize.
14232            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14233            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14234            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14235         return DAG.getNode(ISD::FMUL, DL, VT, N0,
14236                            DAG.getConstantFP(Recip, DL, VT));
14237     }
14238
14239     // If this FDIV is part of a reciprocal square root, it may be folded
14240     // into a target-specific square root estimate instruction.
14241     if (N1.getOpcode() == ISD::FSQRT) {
14242       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14243         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14244     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14245                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14246       if (SDValue RV =
14247               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14248         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14249         AddToWorklist(RV.getNode());
14250         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14251       }
14252     } else if (N1.getOpcode() == ISD::FP_ROUND &&
14253                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14254       if (SDValue RV =
14255               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14256         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14257         AddToWorklist(RV.getNode());
14258         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14259       }
14260     } else if (N1.getOpcode() == ISD::FMUL) {
14261       // Look through an FMUL. Even though this won't remove the FDIV directly,
14262       // it's still worthwhile to get rid of the FSQRT if possible.
14263       SDValue Sqrt, Y;
14264       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14265         Sqrt = N1.getOperand(0);
14266         Y = N1.getOperand(1);
14267       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14268         Sqrt = N1.getOperand(1);
14269         Y = N1.getOperand(0);
14270       }
14271       if (Sqrt.getNode()) {
14272         // If the other multiply operand is known positive, pull it into the
14273         // sqrt. That will eliminate the division if we convert to an estimate.
14274         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14275             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14276           SDValue A;
14277           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14278             A = Y.getOperand(0);
14279           else if (Y == Sqrt.getOperand(0))
14280             A = Y;
14281           if (A) {
14282             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14283             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14284             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14285             SDValue AAZ =
14286                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14287             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14288               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14289
14290             // Estimate creation failed. Clean up speculatively created nodes.
14291             recursivelyDeleteUnusedNodes(AAZ.getNode());
14292           }
14293         }
14294
14295         // We found a FSQRT, so try to make this fold:
14296         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14297         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14298           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14299           AddToWorklist(Div.getNode());
14300           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14301         }
14302       }
14303     }
14304
14305     // Fold into a reciprocal estimate and multiply instead of a real divide.
14306     if (Options.NoInfsFPMath || Flags.hasNoInfs())
14307       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14308         return RV;
14309   }
14310
14311   // Fold X/Sqrt(X) -> Sqrt(X)
14312   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14313       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14314     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14315       return N1;
14316
14317   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14318   TargetLowering::NegatibleCost CostN0 =
14319       TargetLowering::NegatibleCost::Expensive;
14320   TargetLowering::NegatibleCost CostN1 =
14321       TargetLowering::NegatibleCost::Expensive;
14322   SDValue NegN0 =
14323       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14324   SDValue NegN1 =
14325       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14326   if (NegN0 && NegN1 &&
14327       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14328        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14329     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14330
14331   return SDValue();
14332 }
14333
14334 SDValue DAGCombiner::visitFREM(SDNode *N) {
14335   SDValue N0 = N->getOperand(0);
14336   SDValue N1 = N->getOperand(1);
14337   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14338   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14339   EVT VT = N->getValueType(0);
14340   SDNodeFlags Flags = N->getFlags();
14341   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14342
14343   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14344     return R;
14345
14346   // fold (frem c1, c2) -> fmod(c1,c2)
14347   if (N0CFP && N1CFP)
14348     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
14349
14350   if (SDValue NewSel = foldBinOpIntoSelect(N))
14351     return NewSel;
14352
14353   return SDValue();
14354 }
14355
14356 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14357   SDNodeFlags Flags = N->getFlags();
14358   const TargetOptions &Options = DAG.getTarget().Options;
14359
14360   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14361   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14362   if (!Flags.hasApproximateFuncs() ||
14363       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14364     return SDValue();
14365
14366   SDValue N0 = N->getOperand(0);
14367   if (TLI.isFsqrtCheap(N0, DAG))
14368     return SDValue();
14369
14370   // FSQRT nodes have flags that propagate to the created nodes.
14371   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14372   //       transform the fdiv, we may produce a sub-optimal estimate sequence
14373   //       because the reciprocal calculation may not have to filter out a
14374   //       0.0 input.
14375   return buildSqrtEstimate(N0, Flags);
14376 }
14377
14378 /// copysign(x, fp_extend(y)) -> copysign(x, y)
14379 /// copysign(x, fp_round(y)) -> copysign(x, y)
14380 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
14381   SDValue N1 = N->getOperand(1);
14382   if ((N1.getOpcode() == ISD::FP_EXTEND ||
14383        N1.getOpcode() == ISD::FP_ROUND)) {
14384     EVT N1VT = N1->getValueType(0);
14385     EVT N1Op0VT = N1->getOperand(0).getValueType();
14386
14387     // Always fold no-op FP casts.
14388     if (N1VT == N1Op0VT)
14389       return true;
14390
14391     // Do not optimize out type conversion of f128 type yet.
14392     // For some targets like x86_64, configuration is changed to keep one f128
14393     // value in one SSE register, but instruction selection cannot handle
14394     // FCOPYSIGN on SSE registers yet.
14395     if (N1Op0VT == MVT::f128)
14396       return false;
14397
14398     // Avoid mismatched vector operand types, for better instruction selection.
14399     if (N1Op0VT.isVector())
14400       return false;
14401
14402     return true;
14403   }
14404   return false;
14405 }
14406
14407 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14408   SDValue N0 = N->getOperand(0);
14409   SDValue N1 = N->getOperand(1);
14410   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14411   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14412   EVT VT = N->getValueType(0);
14413
14414   if (N0CFP && N1CFP) // Constant fold
14415     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
14416
14417   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14418     const APFloat &V = N1C->getValueAPF();
14419     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
14420     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14421     if (!V.isNegative()) {
14422       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14423         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14424     } else {
14425       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14426         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14427                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14428     }
14429   }
14430
14431   // copysign(fabs(x), y) -> copysign(x, y)
14432   // copysign(fneg(x), y) -> copysign(x, y)
14433   // copysign(copysign(x,z), y) -> copysign(x, y)
14434   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14435       N0.getOpcode() == ISD::FCOPYSIGN)
14436     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14437
14438   // copysign(x, abs(y)) -> abs(x)
14439   if (N1.getOpcode() == ISD::FABS)
14440     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14441
14442   // copysign(x, copysign(y,z)) -> copysign(x, z)
14443   if (N1.getOpcode() == ISD::FCOPYSIGN)
14444     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14445
14446   // copysign(x, fp_extend(y)) -> copysign(x, y)
14447   // copysign(x, fp_round(y)) -> copysign(x, y)
14448   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
14449     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14450
14451   return SDValue();
14452 }
14453
14454 SDValue DAGCombiner::visitFPOW(SDNode *N) {
14455   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14456   if (!ExponentC)
14457     return SDValue();
14458   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14459
14460   // Try to convert x ** (1/3) into cube root.
14461   // TODO: Handle the various flavors of long double.
14462   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14463   //       Some range near 1/3 should be fine.
14464   EVT VT = N->getValueType(0);
14465   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14466       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14467     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14468     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14469     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
14470     // For regular numbers, rounding may cause the results to differ.
14471     // Therefore, we require { nsz ninf nnan afn } for this transform.
14472     // TODO: We could select out the special cases if we don't have nsz/ninf.
14473     SDNodeFlags Flags = N->getFlags();
14474     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14475         !Flags.hasApproximateFuncs())
14476       return SDValue();
14477
14478     // Do not create a cbrt() libcall if the target does not have it, and do not
14479     // turn a pow that has lowering support into a cbrt() libcall.
14480     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14481         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14482          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14483       return SDValue();
14484
14485     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14486   }
14487
14488   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14489   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14490   // TODO: This could be extended (using a target hook) to handle smaller
14491   // power-of-2 fractional exponents.
14492   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14493   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14494   if (ExponentIs025 || ExponentIs075) {
14495     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
14496     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
14497     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
14498     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
14499     // For regular numbers, rounding may cause the results to differ.
14500     // Therefore, we require { nsz ninf afn } for this transform.
14501     // TODO: We could select out the special cases if we don't have nsz/ninf.
14502     SDNodeFlags Flags = N->getFlags();
14503
14504     // We only need no signed zeros for the 0.25 case.
14505     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
14506         !Flags.hasApproximateFuncs())
14507       return SDValue();
14508
14509     // Don't double the number of libcalls. We are trying to inline fast code.
14510     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
14511       return SDValue();
14512
14513     // Assume that libcalls are the smallest code.
14514     // TODO: This restriction should probably be lifted for vectors.
14515     if (ForCodeSize)
14516       return SDValue();
14517
14518     // pow(X, 0.25) --> sqrt(sqrt(X))
14519     SDLoc DL(N);
14520     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
14521     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
14522     if (ExponentIs025)
14523       return SqrtSqrt;
14524     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
14525     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
14526   }
14527
14528   return SDValue();
14529 }
14530
14531 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
14532                                const TargetLowering &TLI) {
14533   // This optimization is guarded by a function attribute because it may produce
14534   // unexpected results. Ie, programs may be relying on the platform-specific
14535   // undefined behavior when the float-to-int conversion overflows.
14536   const Function &F = DAG.getMachineFunction().getFunction();
14537   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
14538   if (StrictOverflow.getValueAsString().equals("false"))
14539     return SDValue();
14540
14541   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
14542   // replacing casts with a libcall. We also must be allowed to ignore -0.0
14543   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
14544   // conversions would return +0.0.
14545   // FIXME: We should be able to use node-level FMF here.
14546   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
14547   EVT VT = N->getValueType(0);
14548   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
14549       !DAG.getTarget().Options.NoSignedZerosFPMath)
14550     return SDValue();
14551
14552   // fptosi/fptoui round towards zero, so converting from FP to integer and
14553   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
14554   SDValue N0 = N->getOperand(0);
14555   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
14556       N0.getOperand(0).getValueType() == VT)
14557     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14558
14559   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
14560       N0.getOperand(0).getValueType() == VT)
14561     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14562
14563   return SDValue();
14564 }
14565
14566 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
14567   SDValue N0 = N->getOperand(0);
14568   EVT VT = N->getValueType(0);
14569   EVT OpVT = N0.getValueType();
14570
14571   // [us]itofp(undef) = 0, because the result value is bounded.
14572   if (N0.isUndef())
14573     return DAG.getConstantFP(0.0, SDLoc(N), VT);
14574
14575   // fold (sint_to_fp c1) -> c1fp
14576   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14577       // ...but only if the target supports immediate floating-point values
14578       (!LegalOperations ||
14579        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14580     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14581
14582   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
14583   // but UINT_TO_FP is legal on this target, try to convert.
14584   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
14585       hasOperation(ISD::UINT_TO_FP, OpVT)) {
14586     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
14587     if (DAG.SignBitIsZero(N0))
14588       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14589   }
14590
14591   // The next optimizations are desirable only if SELECT_CC can be lowered.
14592   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
14593   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
14594       !VT.isVector() &&
14595       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14596     SDLoc DL(N);
14597     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
14598                          DAG.getConstantFP(0.0, DL, VT));
14599   }
14600
14601   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
14602   //      (select (setcc x, y, cc), 1.0, 0.0)
14603   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
14604       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
14605       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14606     SDLoc DL(N);
14607     return DAG.getSelect(DL, VT, N0.getOperand(0),
14608                          DAG.getConstantFP(1.0, DL, VT),
14609                          DAG.getConstantFP(0.0, DL, VT));
14610   }
14611
14612   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14613     return FTrunc;
14614
14615   return SDValue();
14616 }
14617
14618 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
14619   SDValue N0 = N->getOperand(0);
14620   EVT VT = N->getValueType(0);
14621   EVT OpVT = N0.getValueType();
14622
14623   // [us]itofp(undef) = 0, because the result value is bounded.
14624   if (N0.isUndef())
14625     return DAG.getConstantFP(0.0, SDLoc(N), VT);
14626
14627   // fold (uint_to_fp c1) -> c1fp
14628   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14629       // ...but only if the target supports immediate floating-point values
14630       (!LegalOperations ||
14631        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14632     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14633
14634   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
14635   // but SINT_TO_FP is legal on this target, try to convert.
14636   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
14637       hasOperation(ISD::SINT_TO_FP, OpVT)) {
14638     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
14639     if (DAG.SignBitIsZero(N0))
14640       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14641   }
14642
14643   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
14644   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
14645       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14646     SDLoc DL(N);
14647     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
14648                          DAG.getConstantFP(0.0, DL, VT));
14649   }
14650
14651   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14652     return FTrunc;
14653
14654   return SDValue();
14655 }
14656
14657 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
14658 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
14659   SDValue N0 = N->getOperand(0);
14660   EVT VT = N->getValueType(0);
14661
14662   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
14663     return SDValue();
14664
14665   SDValue Src = N0.getOperand(0);
14666   EVT SrcVT = Src.getValueType();
14667   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
14668   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
14669
14670   // We can safely assume the conversion won't overflow the output range,
14671   // because (for example) (uint8_t)18293.f is undefined behavior.
14672
14673   // Since we can assume the conversion won't overflow, our decision as to
14674   // whether the input will fit in the float should depend on the minimum
14675   // of the input range and output range.
14676
14677   // This means this is also safe for a signed input and unsigned output, since
14678   // a negative input would lead to undefined behavior.
14679   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
14680   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
14681   unsigned ActualSize = std::min(InputSize, OutputSize);
14682   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
14683
14684   // We can only fold away the float conversion if the input range can be
14685   // represented exactly in the float range.
14686   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
14687     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
14688       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
14689                                                        : ISD::ZERO_EXTEND;
14690       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
14691     }
14692     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
14693       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
14694     return DAG.getBitcast(VT, Src);
14695   }
14696   return SDValue();
14697 }
14698
14699 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
14700   SDValue N0 = N->getOperand(0);
14701   EVT VT = N->getValueType(0);
14702
14703   // fold (fp_to_sint undef) -> undef
14704   if (N0.isUndef())
14705     return DAG.getUNDEF(VT);
14706
14707   // fold (fp_to_sint c1fp) -> c1
14708   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14709     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
14710
14711   return FoldIntToFPToInt(N, DAG);
14712 }
14713
14714 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
14715   SDValue N0 = N->getOperand(0);
14716   EVT VT = N->getValueType(0);
14717
14718   // fold (fp_to_uint undef) -> undef
14719   if (N0.isUndef())
14720     return DAG.getUNDEF(VT);
14721
14722   // fold (fp_to_uint c1fp) -> c1
14723   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14724     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
14725
14726   return FoldIntToFPToInt(N, DAG);
14727 }
14728
14729 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
14730   SDValue N0 = N->getOperand(0);
14731   SDValue N1 = N->getOperand(1);
14732   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14733   EVT VT = N->getValueType(0);
14734
14735   // fold (fp_round c1fp) -> c1fp
14736   if (N0CFP)
14737     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
14738
14739   // fold (fp_round (fp_extend x)) -> x
14740   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
14741     return N0.getOperand(0);
14742
14743   // fold (fp_round (fp_round x)) -> (fp_round x)
14744   if (N0.getOpcode() == ISD::FP_ROUND) {
14745     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
14746     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
14747
14748     // Skip this folding if it results in an fp_round from f80 to f16.
14749     //
14750     // f80 to f16 always generates an expensive (and as yet, unimplemented)
14751     // libcall to __truncxfhf2 instead of selecting native f16 conversion
14752     // instructions from f32 or f64.  Moreover, the first (value-preserving)
14753     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
14754     // x86.
14755     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
14756       return SDValue();
14757
14758     // If the first fp_round isn't a value preserving truncation, it might
14759     // introduce a tie in the second fp_round, that wouldn't occur in the
14760     // single-step fp_round we want to fold to.
14761     // In other words, double rounding isn't the same as rounding.
14762     // Also, this is a value preserving truncation iff both fp_round's are.
14763     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
14764       SDLoc DL(N);
14765       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
14766                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
14767     }
14768   }
14769
14770   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
14771   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
14772     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
14773                               N0.getOperand(0), N1);
14774     AddToWorklist(Tmp.getNode());
14775     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
14776                        Tmp, N0.getOperand(1));
14777   }
14778
14779   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14780     return NewVSel;
14781
14782   return SDValue();
14783 }
14784
14785 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
14786   SDValue N0 = N->getOperand(0);
14787   EVT VT = N->getValueType(0);
14788
14789   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
14790   if (N->hasOneUse() &&
14791       N->use_begin()->getOpcode() == ISD::FP_ROUND)
14792     return SDValue();
14793
14794   // fold (fp_extend c1fp) -> c1fp
14795   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14796     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
14797
14798   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
14799   if (N0.getOpcode() == ISD::FP16_TO_FP &&
14800       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
14801     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
14802
14803   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
14804   // value of X.
14805   if (N0.getOpcode() == ISD::FP_ROUND
14806       && N0.getConstantOperandVal(1) == 1) {
14807     SDValue In = N0.getOperand(0);
14808     if (In.getValueType() == VT) return In;
14809     if (VT.bitsLT(In.getValueType()))
14810       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
14811                          In, N0.getOperand(1));
14812     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
14813   }
14814
14815   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
14816   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14817        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14818     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14819     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
14820                                      LN0->getChain(),
14821                                      LN0->getBasePtr(), N0.getValueType(),
14822                                      LN0->getMemOperand());
14823     CombineTo(N, ExtLoad);
14824     CombineTo(N0.getNode(),
14825               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
14826                           N0.getValueType(), ExtLoad,
14827                           DAG.getIntPtrConstant(1, SDLoc(N0))),
14828               ExtLoad.getValue(1));
14829     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14830   }
14831
14832   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14833     return NewVSel;
14834
14835   return SDValue();
14836 }
14837
14838 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
14839   SDValue N0 = N->getOperand(0);
14840   EVT VT = N->getValueType(0);
14841
14842   // fold (fceil c1) -> fceil(c1)
14843   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14844     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
14845
14846   return SDValue();
14847 }
14848
14849 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
14850   SDValue N0 = N->getOperand(0);
14851   EVT VT = N->getValueType(0);
14852
14853   // fold (ftrunc c1) -> ftrunc(c1)
14854   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14855     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
14856
14857   // fold ftrunc (known rounded int x) -> x
14858   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
14859   // likely to be generated to extract integer from a rounded floating value.
14860   switch (N0.getOpcode()) {
14861   default: break;
14862   case ISD::FRINT:
14863   case ISD::FTRUNC:
14864   case ISD::FNEARBYINT:
14865   case ISD::FFLOOR:
14866   case ISD::FCEIL:
14867     return N0;
14868   }
14869
14870   return SDValue();
14871 }
14872
14873 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
14874   SDValue N0 = N->getOperand(0);
14875   EVT VT = N->getValueType(0);
14876
14877   // fold (ffloor c1) -> ffloor(c1)
14878   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14879     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
14880
14881   return SDValue();
14882 }
14883
14884 SDValue DAGCombiner::visitFNEG(SDNode *N) {
14885   SDValue N0 = N->getOperand(0);
14886   EVT VT = N->getValueType(0);
14887   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14888
14889   // Constant fold FNEG.
14890   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14891     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
14892
14893   if (SDValue NegN0 =
14894           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
14895     return NegN0;
14896
14897   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
14898   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
14899   // know it was called from a context with a nsz flag if the input fsub does
14900   // not.
14901   if (N0.getOpcode() == ISD::FSUB &&
14902       (DAG.getTarget().Options.NoSignedZerosFPMath ||
14903        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
14904     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
14905                        N0.getOperand(0));
14906   }
14907
14908   if (SDValue Cast = foldSignChangeInBitcast(N))
14909     return Cast;
14910
14911   return SDValue();
14912 }
14913
14914 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
14915                             APFloat (*Op)(const APFloat &, const APFloat &)) {
14916   SDValue N0 = N->getOperand(0);
14917   SDValue N1 = N->getOperand(1);
14918   EVT VT = N->getValueType(0);
14919   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
14920   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
14921   const SDNodeFlags Flags = N->getFlags();
14922   unsigned Opc = N->getOpcode();
14923   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
14924   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
14925   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14926
14927   if (N0CFP && N1CFP) {
14928     const APFloat &C0 = N0CFP->getValueAPF();
14929     const APFloat &C1 = N1CFP->getValueAPF();
14930     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
14931   }
14932
14933   // Canonicalize to constant on RHS.
14934   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14935       !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14936     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
14937
14938   if (N1CFP) {
14939     const APFloat &AF = N1CFP->getValueAPF();
14940
14941     // minnum(X, nan) -> X
14942     // maxnum(X, nan) -> X
14943     // minimum(X, nan) -> nan
14944     // maximum(X, nan) -> nan
14945     if (AF.isNaN())
14946       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
14947
14948     // In the following folds, inf can be replaced with the largest finite
14949     // float, if the ninf flag is set.
14950     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
14951       // minnum(X, -inf) -> -inf
14952       // maxnum(X, +inf) -> +inf
14953       // minimum(X, -inf) -> -inf if nnan
14954       // maximum(X, +inf) -> +inf if nnan
14955       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
14956         return N->getOperand(1);
14957
14958       // minnum(X, +inf) -> X if nnan
14959       // maxnum(X, -inf) -> X if nnan
14960       // minimum(X, +inf) -> X
14961       // maximum(X, -inf) -> X
14962       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
14963         return N->getOperand(0);
14964     }
14965   }
14966
14967   return SDValue();
14968 }
14969
14970 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
14971   return visitFMinMax(DAG, N, minnum);
14972 }
14973
14974 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
14975   return visitFMinMax(DAG, N, maxnum);
14976 }
14977
14978 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
14979   return visitFMinMax(DAG, N, minimum);
14980 }
14981
14982 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
14983   return visitFMinMax(DAG, N, maximum);
14984 }
14985
14986 SDValue DAGCombiner::visitFABS(SDNode *N) {
14987   SDValue N0 = N->getOperand(0);
14988   EVT VT = N->getValueType(0);
14989
14990   // fold (fabs c1) -> fabs(c1)
14991   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14992     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14993
14994   // fold (fabs (fabs x)) -> (fabs x)
14995   if (N0.getOpcode() == ISD::FABS)
14996     return N->getOperand(0);
14997
14998   // fold (fabs (fneg x)) -> (fabs x)
14999   // fold (fabs (fcopysign x, y)) -> (fabs x)
15000   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15001     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15002
15003   if (SDValue Cast = foldSignChangeInBitcast(N))
15004     return Cast;
15005
15006   return SDValue();
15007 }
15008
15009 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15010   SDValue Chain = N->getOperand(0);
15011   SDValue N1 = N->getOperand(1);
15012   SDValue N2 = N->getOperand(2);
15013
15014   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15015   // nondeterministic jumps).
15016   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15017     return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15018                        N1->getOperand(0), N2);
15019   }
15020
15021   // If N is a constant we could fold this into a fallthrough or unconditional
15022   // branch. However that doesn't happen very often in normal code, because
15023   // Instcombine/SimplifyCFG should have handled the available opportunities.
15024   // If we did this folding here, it would be necessary to update the
15025   // MachineBasicBlock CFG, which is awkward.
15026
15027   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15028   // on the target.
15029   if (N1.getOpcode() == ISD::SETCC &&
15030       TLI.isOperationLegalOrCustom(ISD::BR_CC,
15031                                    N1.getOperand(0).getValueType())) {
15032     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15033                        Chain, N1.getOperand(2),
15034                        N1.getOperand(0), N1.getOperand(1), N2);
15035   }
15036
15037   if (N1.hasOneUse()) {
15038     // rebuildSetCC calls visitXor which may change the Chain when there is a
15039     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15040     HandleSDNode ChainHandle(Chain);
15041     if (SDValue NewN1 = rebuildSetCC(N1))
15042       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15043                          ChainHandle.getValue(), NewN1, N2);
15044   }
15045
15046   return SDValue();
15047 }
15048
15049 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15050   if (N.getOpcode() == ISD::SRL ||
15051       (N.getOpcode() == ISD::TRUNCATE &&
15052        (N.getOperand(0).hasOneUse() &&
15053         N.getOperand(0).getOpcode() == ISD::SRL))) {
15054     // Look pass the truncate.
15055     if (N.getOpcode() == ISD::TRUNCATE)
15056       N = N.getOperand(0);
15057
15058     // Match this pattern so that we can generate simpler code:
15059     //
15060     //   %a = ...
15061     //   %b = and i32 %a, 2
15062     //   %c = srl i32 %b, 1
15063     //   brcond i32 %c ...
15064     //
15065     // into
15066     //
15067     //   %a = ...
15068     //   %b = and i32 %a, 2
15069     //   %c = setcc eq %b, 0
15070     //   brcond %c ...
15071     //
15072     // This applies only when the AND constant value has one bit set and the
15073     // SRL constant is equal to the log2 of the AND constant. The back-end is
15074     // smart enough to convert the result into a TEST/JMP sequence.
15075     SDValue Op0 = N.getOperand(0);
15076     SDValue Op1 = N.getOperand(1);
15077
15078     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15079       SDValue AndOp1 = Op0.getOperand(1);
15080
15081       if (AndOp1.getOpcode() == ISD::Constant) {
15082         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15083
15084         if (AndConst.isPowerOf2() &&
15085             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15086           SDLoc DL(N);
15087           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15088                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15089                               ISD::SETNE);
15090         }
15091       }
15092     }
15093   }
15094
15095   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15096   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15097   if (N.getOpcode() == ISD::XOR) {
15098     // Because we may call this on a speculatively constructed
15099     // SimplifiedSetCC Node, we need to simplify this node first.
15100     // Ideally this should be folded into SimplifySetCC and not
15101     // here. For now, grab a handle to N so we don't lose it from
15102     // replacements interal to the visit.
15103     HandleSDNode XORHandle(N);
15104     while (N.getOpcode() == ISD::XOR) {
15105       SDValue Tmp = visitXOR(N.getNode());
15106       // No simplification done.
15107       if (!Tmp.getNode())
15108         break;
15109       // Returning N is form in-visit replacement that may invalidated
15110       // N. Grab value from Handle.
15111       if (Tmp.getNode() == N.getNode())
15112         N = XORHandle.getValue();
15113       else // Node simplified. Try simplifying again.
15114         N = Tmp;
15115     }
15116
15117     if (N.getOpcode() != ISD::XOR)
15118       return N;
15119
15120     SDValue Op0 = N->getOperand(0);
15121     SDValue Op1 = N->getOperand(1);
15122
15123     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15124       bool Equal = false;
15125       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15126       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15127           Op0.getValueType() == MVT::i1) {
15128         N = Op0;
15129         Op0 = N->getOperand(0);
15130         Op1 = N->getOperand(1);
15131         Equal = true;
15132       }
15133
15134       EVT SetCCVT = N.getValueType();
15135       if (LegalTypes)
15136         SetCCVT = getSetCCResultType(SetCCVT);
15137       // Replace the uses of XOR with SETCC
15138       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15139                           Equal ? ISD::SETEQ : ISD::SETNE);
15140     }
15141   }
15142
15143   return SDValue();
15144 }
15145
15146 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15147 //
15148 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15149   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15150   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15151
15152   // If N is a constant we could fold this into a fallthrough or unconditional
15153   // branch. However that doesn't happen very often in normal code, because
15154   // Instcombine/SimplifyCFG should have handled the available opportunities.
15155   // If we did this folding here, it would be necessary to update the
15156   // MachineBasicBlock CFG, which is awkward.
15157
15158   // Use SimplifySetCC to simplify SETCC's.
15159   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
15160                                CondLHS, CondRHS, CC->get(), SDLoc(N),
15161                                false);
15162   if (Simp.getNode()) AddToWorklist(Simp.getNode());
15163
15164   // fold to a simpler setcc
15165   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15166     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15167                        N->getOperand(0), Simp.getOperand(2),
15168                        Simp.getOperand(0), Simp.getOperand(1),
15169                        N->getOperand(4));
15170
15171   return SDValue();
15172 }
15173
15174 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15175                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15176                                      const TargetLowering &TLI) {
15177   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15178     if (LD->isIndexed())
15179       return false;
15180     EVT VT = LD->getMemoryVT();
15181     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15182       return false;
15183     Ptr = LD->getBasePtr();
15184   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15185     if (ST->isIndexed())
15186       return false;
15187     EVT VT = ST->getMemoryVT();
15188     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15189       return false;
15190     Ptr = ST->getBasePtr();
15191     IsLoad = false;
15192   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15193     if (LD->isIndexed())
15194       return false;
15195     EVT VT = LD->getMemoryVT();
15196     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15197         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15198       return false;
15199     Ptr = LD->getBasePtr();
15200     IsMasked = true;
15201   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15202     if (ST->isIndexed())
15203       return false;
15204     EVT VT = ST->getMemoryVT();
15205     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15206         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15207       return false;
15208     Ptr = ST->getBasePtr();
15209     IsLoad = false;
15210     IsMasked = true;
15211   } else {
15212     return false;
15213   }
15214   return true;
15215 }
15216
15217 /// Try turning a load/store into a pre-indexed load/store when the base
15218 /// pointer is an add or subtract and it has other uses besides the load/store.
15219 /// After the transformation, the new indexed load/store has effectively folded
15220 /// the add/subtract in and all of its other uses are redirected to the
15221 /// new load/store.
15222 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15223   if (Level < AfterLegalizeDAG)
15224     return false;
15225
15226   bool IsLoad = true;
15227   bool IsMasked = false;
15228   SDValue Ptr;
15229   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15230                                 Ptr, TLI))
15231     return false;
15232
15233   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15234   // out.  There is no reason to make this a preinc/predec.
15235   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15236       Ptr.getNode()->hasOneUse())
15237     return false;
15238
15239   // Ask the target to do addressing mode selection.
15240   SDValue BasePtr;
15241   SDValue Offset;
15242   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15243   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15244     return false;
15245
15246   // Backends without true r+i pre-indexed forms may need to pass a
15247   // constant base with a variable offset so that constant coercion
15248   // will work with the patterns in canonical form.
15249   bool Swapped = false;
15250   if (isa<ConstantSDNode>(BasePtr)) {
15251     std::swap(BasePtr, Offset);
15252     Swapped = true;
15253   }
15254
15255   // Don't create a indexed load / store with zero offset.
15256   if (isNullConstant(Offset))
15257     return false;
15258
15259   // Try turning it into a pre-indexed load / store except when:
15260   // 1) The new base ptr is a frame index.
15261   // 2) If N is a store and the new base ptr is either the same as or is a
15262   //    predecessor of the value being stored.
15263   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15264   //    that would create a cycle.
15265   // 4) All uses are load / store ops that use it as old base ptr.
15266
15267   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
15268   // (plus the implicit offset) to a register to preinc anyway.
15269   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15270     return false;
15271
15272   // Check #2.
15273   if (!IsLoad) {
15274     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15275                            : cast<StoreSDNode>(N)->getValue();
15276
15277     // Would require a copy.
15278     if (Val == BasePtr)
15279       return false;
15280
15281     // Would create a cycle.
15282     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15283       return false;
15284   }
15285
15286   // Caches for hasPredecessorHelper.
15287   SmallPtrSet<const SDNode *, 32> Visited;
15288   SmallVector<const SDNode *, 16> Worklist;
15289   Worklist.push_back(N);
15290
15291   // If the offset is a constant, there may be other adds of constants that
15292   // can be folded with this one. We should do this to avoid having to keep
15293   // a copy of the original base pointer.
15294   SmallVector<SDNode *, 16> OtherUses;
15295   if (isa<ConstantSDNode>(Offset))
15296     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15297                               UE = BasePtr.getNode()->use_end();
15298          UI != UE; ++UI) {
15299       SDUse &Use = UI.getUse();
15300       // Skip the use that is Ptr and uses of other results from BasePtr's
15301       // node (important for nodes that return multiple results).
15302       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15303         continue;
15304
15305       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15306         continue;
15307
15308       if (Use.getUser()->getOpcode() != ISD::ADD &&
15309           Use.getUser()->getOpcode() != ISD::SUB) {
15310         OtherUses.clear();
15311         break;
15312       }
15313
15314       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15315       if (!isa<ConstantSDNode>(Op1)) {
15316         OtherUses.clear();
15317         break;
15318       }
15319
15320       // FIXME: In some cases, we can be smarter about this.
15321       if (Op1.getValueType() != Offset.getValueType()) {
15322         OtherUses.clear();
15323         break;
15324       }
15325
15326       OtherUses.push_back(Use.getUser());
15327     }
15328
15329   if (Swapped)
15330     std::swap(BasePtr, Offset);
15331
15332   // Now check for #3 and #4.
15333   bool RealUse = false;
15334
15335   for (SDNode *Use : Ptr.getNode()->uses()) {
15336     if (Use == N)
15337       continue;
15338     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15339       return false;
15340
15341     // If Ptr may be folded in addressing mode of other use, then it's
15342     // not profitable to do this transformation.
15343     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15344       RealUse = true;
15345   }
15346
15347   if (!RealUse)
15348     return false;
15349
15350   SDValue Result;
15351   if (!IsMasked) {
15352     if (IsLoad)
15353       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15354     else
15355       Result =
15356           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15357   } else {
15358     if (IsLoad)
15359       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15360                                         Offset, AM);
15361     else
15362       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15363                                          Offset, AM);
15364   }
15365   ++PreIndexedNodes;
15366   ++NodesCombined;
15367   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
15368              Result.getNode()->dump(&DAG); dbgs() << '\n');
15369   WorklistRemover DeadNodes(*this);
15370   if (IsLoad) {
15371     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15372     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15373   } else {
15374     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15375   }
15376
15377   // Finally, since the node is now dead, remove it from the graph.
15378   deleteAndRecombine(N);
15379
15380   if (Swapped)
15381     std::swap(BasePtr, Offset);
15382
15383   // Replace other uses of BasePtr that can be updated to use Ptr
15384   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15385     unsigned OffsetIdx = 1;
15386     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15387       OffsetIdx = 0;
15388     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
15389            BasePtr.getNode() && "Expected BasePtr operand");
15390
15391     // We need to replace ptr0 in the following expression:
15392     //   x0 * offset0 + y0 * ptr0 = t0
15393     // knowing that
15394     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15395     //
15396     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15397     // indexed load/store and the expression that needs to be re-written.
15398     //
15399     // Therefore, we have:
15400     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15401
15402     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15403     const APInt &Offset0 = CN->getAPIntValue();
15404     const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15405     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15406     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15407     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15408     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15409
15410     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15411
15412     APInt CNV = Offset0;
15413     if (X0 < 0) CNV = -CNV;
15414     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15415     else CNV = CNV - Offset1;
15416
15417     SDLoc DL(OtherUses[i]);
15418
15419     // We can now generate the new expression.
15420     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15421     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15422
15423     SDValue NewUse = DAG.getNode(Opcode,
15424                                  DL,
15425                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15426     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15427     deleteAndRecombine(OtherUses[i]);
15428   }
15429
15430   // Replace the uses of Ptr with uses of the updated base value.
15431   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15432   deleteAndRecombine(Ptr.getNode());
15433   AddToWorklist(Result.getNode());
15434
15435   return true;
15436 }
15437
15438 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15439                                    SDValue &BasePtr, SDValue &Offset,
15440                                    ISD::MemIndexedMode &AM,
15441                                    SelectionDAG &DAG,
15442                                    const TargetLowering &TLI) {
15443   if (PtrUse == N ||
15444       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15445     return false;
15446
15447   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15448     return false;
15449
15450   // Don't create a indexed load / store with zero offset.
15451   if (isNullConstant(Offset))
15452     return false;
15453
15454   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15455     return false;
15456
15457   SmallPtrSet<const SDNode *, 32> Visited;
15458   for (SDNode *Use : BasePtr.getNode()->uses()) {
15459     if (Use == Ptr.getNode())
15460       continue;
15461
15462     // No if there's a later user which could perform the index instead.
15463     if (isa<MemSDNode>(Use)) {
15464       bool IsLoad = true;
15465       bool IsMasked = false;
15466       SDValue OtherPtr;
15467       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15468                                    IsMasked, OtherPtr, TLI)) {
15469         SmallVector<const SDNode *, 2> Worklist;
15470         Worklist.push_back(Use);
15471         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15472           return false;
15473       }
15474     }
15475
15476     // If all the uses are load / store addresses, then don't do the
15477     // transformation.
15478     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15479       for (SDNode *UseUse : Use->uses())
15480         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15481           return false;
15482     }
15483   }
15484   return true;
15485 }
15486
15487 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15488                                          bool &IsMasked, SDValue &Ptr,
15489                                          SDValue &BasePtr, SDValue &Offset,
15490                                          ISD::MemIndexedMode &AM,
15491                                          SelectionDAG &DAG,
15492                                          const TargetLowering &TLI) {
15493   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15494                                 IsMasked, Ptr, TLI) ||
15495       Ptr.getNode()->hasOneUse())
15496     return nullptr;
15497
15498   // Try turning it into a post-indexed load / store except when
15499   // 1) All uses are load / store ops that use it as base ptr (and
15500   //    it may be folded as addressing mmode).
15501   // 2) Op must be independent of N, i.e. Op is neither a predecessor
15502   //    nor a successor of N. Otherwise, if Op is folded that would
15503   //    create a cycle.
15504   for (SDNode *Op : Ptr->uses()) {
15505     // Check for #1.
15506     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15507       continue;
15508
15509     // Check for #2.
15510     SmallPtrSet<const SDNode *, 32> Visited;
15511     SmallVector<const SDNode *, 8> Worklist;
15512     // Ptr is predecessor to both N and Op.
15513     Visited.insert(Ptr.getNode());
15514     Worklist.push_back(N);
15515     Worklist.push_back(Op);
15516     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15517         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15518       return Op;
15519   }
15520   return nullptr;
15521 }
15522
15523 /// Try to combine a load/store with a add/sub of the base pointer node into a
15524 /// post-indexed load/store. The transformation folded the add/subtract into the
15525 /// new indexed load/store effectively and all of its uses are redirected to the
15526 /// new load/store.
15527 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
15528   if (Level < AfterLegalizeDAG)
15529     return false;
15530
15531   bool IsLoad = true;
15532   bool IsMasked = false;
15533   SDValue Ptr;
15534   SDValue BasePtr;
15535   SDValue Offset;
15536   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15537   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
15538                                          Offset, AM, DAG, TLI);
15539   if (!Op)
15540     return false;
15541
15542   SDValue Result;
15543   if (!IsMasked)
15544     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15545                                          Offset, AM)
15546                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
15547                                           BasePtr, Offset, AM);
15548   else
15549     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
15550                                                BasePtr, Offset, AM)
15551                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
15552                                                 BasePtr, Offset, AM);
15553   ++PostIndexedNodes;
15554   ++NodesCombined;
15555   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
15556              dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
15557              dbgs() << '\n');
15558   WorklistRemover DeadNodes(*this);
15559   if (IsLoad) {
15560     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15561     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15562   } else {
15563     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15564   }
15565
15566   // Finally, since the node is now dead, remove it from the graph.
15567   deleteAndRecombine(N);
15568
15569   // Replace the uses of Use with uses of the updated base value.
15570   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
15571                                 Result.getValue(IsLoad ? 1 : 0));
15572   deleteAndRecombine(Op);
15573   return true;
15574 }
15575
15576 /// Return the base-pointer arithmetic from an indexed \p LD.
15577 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
15578   ISD::MemIndexedMode AM = LD->getAddressingMode();
15579   assert(AM != ISD::UNINDEXED);
15580   SDValue BP = LD->getOperand(1);
15581   SDValue Inc = LD->getOperand(2);
15582
15583   // Some backends use TargetConstants for load offsets, but don't expect
15584   // TargetConstants in general ADD nodes. We can convert these constants into
15585   // regular Constants (if the constant is not opaque).
15586   assert((Inc.getOpcode() != ISD::TargetConstant ||
15587           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
15588          "Cannot split out indexing using opaque target constants");
15589   if (Inc.getOpcode() == ISD::TargetConstant) {
15590     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
15591     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
15592                           ConstInc->getValueType(0));
15593   }
15594
15595   unsigned Opc =
15596       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
15597   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
15598 }
15599
15600 static inline ElementCount numVectorEltsOrZero(EVT T) {
15601   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
15602 }
15603
15604 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
15605   Val = ST->getValue();
15606   EVT STType = Val.getValueType();
15607   EVT STMemType = ST->getMemoryVT();
15608   if (STType == STMemType)
15609     return true;
15610   if (isTypeLegal(STMemType))
15611     return false; // fail.
15612   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
15613       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
15614     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
15615     return true;
15616   }
15617   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
15618       STType.isInteger() && STMemType.isInteger()) {
15619     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
15620     return true;
15621   }
15622   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
15623     Val = DAG.getBitcast(STMemType, Val);
15624     return true;
15625   }
15626   return false; // fail.
15627 }
15628
15629 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
15630   EVT LDMemType = LD->getMemoryVT();
15631   EVT LDType = LD->getValueType(0);
15632   assert(Val.getValueType() == LDMemType &&
15633          "Attempting to extend value of non-matching type");
15634   if (LDType == LDMemType)
15635     return true;
15636   if (LDMemType.isInteger() && LDType.isInteger()) {
15637     switch (LD->getExtensionType()) {
15638     case ISD::NON_EXTLOAD:
15639       Val = DAG.getBitcast(LDType, Val);
15640       return true;
15641     case ISD::EXTLOAD:
15642       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
15643       return true;
15644     case ISD::SEXTLOAD:
15645       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
15646       return true;
15647     case ISD::ZEXTLOAD:
15648       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
15649       return true;
15650     }
15651   }
15652   return false;
15653 }
15654
15655 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
15656   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
15657     return SDValue();
15658   SDValue Chain = LD->getOperand(0);
15659   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
15660   // TODO: Relax this restriction for unordered atomics (see D66309)
15661   if (!ST || !ST->isSimple())
15662     return SDValue();
15663
15664   EVT LDType = LD->getValueType(0);
15665   EVT LDMemType = LD->getMemoryVT();
15666   EVT STMemType = ST->getMemoryVT();
15667   EVT STType = ST->getValue().getValueType();
15668
15669   // There are two cases to consider here:
15670   //  1. The store is fixed width and the load is scalable. In this case we
15671   //     don't know at compile time if the store completely envelops the load
15672   //     so we abandon the optimisation.
15673   //  2. The store is scalable and the load is fixed width. We could
15674   //     potentially support a limited number of cases here, but there has been
15675   //     no cost-benefit analysis to prove it's worth it.
15676   bool LdStScalable = LDMemType.isScalableVector();
15677   if (LdStScalable != STMemType.isScalableVector())
15678     return SDValue();
15679
15680   // If we are dealing with scalable vectors on a big endian platform the
15681   // calculation of offsets below becomes trickier, since we do not know at
15682   // compile time the absolute size of the vector. Until we've done more
15683   // analysis on big-endian platforms it seems better to bail out for now.
15684   if (LdStScalable && DAG.getDataLayout().isBigEndian())
15685     return SDValue();
15686
15687   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
15688   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
15689   int64_t Offset;
15690   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
15691     return SDValue();
15692
15693   // Normalize for Endianness. After this Offset=0 will denote that the least
15694   // significant bit in the loaded value maps to the least significant bit in
15695   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
15696   // n:th least significant byte of the stored value.
15697   if (DAG.getDataLayout().isBigEndian())
15698     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
15699               (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
15700                  8 -
15701              Offset;
15702
15703   // Check that the stored value cover all bits that are loaded.
15704   bool STCoversLD;
15705
15706   TypeSize LdMemSize = LDMemType.getSizeInBits();
15707   TypeSize StMemSize = STMemType.getSizeInBits();
15708   if (LdStScalable)
15709     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
15710   else
15711     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
15712                                    StMemSize.getFixedSize());
15713
15714   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
15715     if (LD->isIndexed()) {
15716       // Cannot handle opaque target constants and we must respect the user's
15717       // request not to split indexes from loads.
15718       if (!canSplitIdx(LD))
15719         return SDValue();
15720       SDValue Idx = SplitIndexingFromLoad(LD);
15721       SDValue Ops[] = {Val, Idx, Chain};
15722       return CombineTo(LD, Ops, 3);
15723     }
15724     return CombineTo(LD, Val, Chain);
15725   };
15726
15727   if (!STCoversLD)
15728     return SDValue();
15729
15730   // Memory as copy space (potentially masked).
15731   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
15732     // Simple case: Direct non-truncating forwarding
15733     if (LDType.getSizeInBits() == LdMemSize)
15734       return ReplaceLd(LD, ST->getValue(), Chain);
15735     // Can we model the truncate and extension with an and mask?
15736     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
15737         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
15738       // Mask to size of LDMemType
15739       auto Mask =
15740           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
15741                                                StMemSize.getFixedSize()),
15742                           SDLoc(ST), STType);
15743       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
15744       return ReplaceLd(LD, Val, Chain);
15745     }
15746   }
15747
15748   // TODO: Deal with nonzero offset.
15749   if (LD->getBasePtr().isUndef() || Offset != 0)
15750     return SDValue();
15751   // Model necessary truncations / extenstions.
15752   SDValue Val;
15753   // Truncate Value To Stored Memory Size.
15754   do {
15755     if (!getTruncatedStoreValue(ST, Val))
15756       continue;
15757     if (!isTypeLegal(LDMemType))
15758       continue;
15759     if (STMemType != LDMemType) {
15760       // TODO: Support vectors? This requires extract_subvector/bitcast.
15761       if (!STMemType.isVector() && !LDMemType.isVector() &&
15762           STMemType.isInteger() && LDMemType.isInteger())
15763         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
15764       else
15765         continue;
15766     }
15767     if (!extendLoadedValueToExtension(LD, Val))
15768       continue;
15769     return ReplaceLd(LD, Val, Chain);
15770   } while (false);
15771
15772   // On failure, cleanup dead nodes we may have created.
15773   if (Val->use_empty())
15774     deleteAndRecombine(Val.getNode());
15775   return SDValue();
15776 }
15777
15778 SDValue DAGCombiner::visitLOAD(SDNode *N) {
15779   LoadSDNode *LD  = cast<LoadSDNode>(N);
15780   SDValue Chain = LD->getChain();
15781   SDValue Ptr   = LD->getBasePtr();
15782
15783   // If load is not volatile and there are no uses of the loaded value (and
15784   // the updated indexed value in case of indexed loads), change uses of the
15785   // chain value into uses of the chain input (i.e. delete the dead load).
15786   // TODO: Allow this for unordered atomics (see D66309)
15787   if (LD->isSimple()) {
15788     if (N->getValueType(1) == MVT::Other) {
15789       // Unindexed loads.
15790       if (!N->hasAnyUseOfValue(0)) {
15791         // It's not safe to use the two value CombineTo variant here. e.g.
15792         // v1, chain2 = load chain1, loc
15793         // v2, chain3 = load chain2, loc
15794         // v3         = add v2, c
15795         // Now we replace use of chain2 with chain1.  This makes the second load
15796         // isomorphic to the one we are deleting, and thus makes this load live.
15797         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
15798                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
15799                    dbgs() << "\n");
15800         WorklistRemover DeadNodes(*this);
15801         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15802         AddUsersToWorklist(Chain.getNode());
15803         if (N->use_empty())
15804           deleteAndRecombine(N);
15805
15806         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15807       }
15808     } else {
15809       // Indexed loads.
15810       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
15811
15812       // If this load has an opaque TargetConstant offset, then we cannot split
15813       // the indexing into an add/sub directly (that TargetConstant may not be
15814       // valid for a different type of node, and we cannot convert an opaque
15815       // target constant into a regular constant).
15816       bool CanSplitIdx = canSplitIdx(LD);
15817
15818       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
15819         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
15820         SDValue Index;
15821         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
15822           Index = SplitIndexingFromLoad(LD);
15823           // Try to fold the base pointer arithmetic into subsequent loads and
15824           // stores.
15825           AddUsersToWorklist(N);
15826         } else
15827           Index = DAG.getUNDEF(N->getValueType(1));
15828         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
15829                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
15830                    dbgs() << " and 2 other values\n");
15831         WorklistRemover DeadNodes(*this);
15832         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
15833         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
15834         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
15835         deleteAndRecombine(N);
15836         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15837       }
15838     }
15839   }
15840
15841   // If this load is directly stored, replace the load value with the stored
15842   // value.
15843   if (auto V = ForwardStoreValueToDirectLoad(LD))
15844     return V;
15845
15846   // Try to infer better alignment information than the load already has.
15847   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
15848     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
15849       if (*Alignment > LD->getAlign() &&
15850           isAligned(*Alignment, LD->getSrcValueOffset())) {
15851         SDValue NewLoad = DAG.getExtLoad(
15852             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
15853             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
15854             LD->getMemOperand()->getFlags(), LD->getAAInfo());
15855         // NewLoad will always be N as we are only refining the alignment
15856         assert(NewLoad.getNode() == N);
15857         (void)NewLoad;
15858       }
15859     }
15860   }
15861
15862   if (LD->isUnindexed()) {
15863     // Walk up chain skipping non-aliasing memory nodes.
15864     SDValue BetterChain = FindBetterChain(LD, Chain);
15865
15866     // If there is a better chain.
15867     if (Chain != BetterChain) {
15868       SDValue ReplLoad;
15869
15870       // Replace the chain to void dependency.
15871       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
15872         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
15873                                BetterChain, Ptr, LD->getMemOperand());
15874       } else {
15875         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
15876                                   LD->getValueType(0),
15877                                   BetterChain, Ptr, LD->getMemoryVT(),
15878                                   LD->getMemOperand());
15879       }
15880
15881       // Create token factor to keep old chain connected.
15882       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
15883                                   MVT::Other, Chain, ReplLoad.getValue(1));
15884
15885       // Replace uses with load result and token factor
15886       return CombineTo(N, ReplLoad.getValue(0), Token);
15887     }
15888   }
15889
15890   // Try transforming N to an indexed load.
15891   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15892     return SDValue(N, 0);
15893
15894   // Try to slice up N to more direct loads if the slices are mapped to
15895   // different register banks or pairing can take place.
15896   if (SliceUpLoad(N))
15897     return SDValue(N, 0);
15898
15899   return SDValue();
15900 }
15901
15902 namespace {
15903
15904 /// Helper structure used to slice a load in smaller loads.
15905 /// Basically a slice is obtained from the following sequence:
15906 /// Origin = load Ty1, Base
15907 /// Shift = srl Ty1 Origin, CstTy Amount
15908 /// Inst = trunc Shift to Ty2
15909 ///
15910 /// Then, it will be rewritten into:
15911 /// Slice = load SliceTy, Base + SliceOffset
15912 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
15913 ///
15914 /// SliceTy is deduced from the number of bits that are actually used to
15915 /// build Inst.
15916 struct LoadedSlice {
15917   /// Helper structure used to compute the cost of a slice.
15918   struct Cost {
15919     /// Are we optimizing for code size.
15920     bool ForCodeSize = false;
15921
15922     /// Various cost.
15923     unsigned Loads = 0;
15924     unsigned Truncates = 0;
15925     unsigned CrossRegisterBanksCopies = 0;
15926     unsigned ZExts = 0;
15927     unsigned Shift = 0;
15928
15929     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
15930
15931     /// Get the cost of one isolated slice.
15932     Cost(const LoadedSlice &LS, bool ForCodeSize)
15933         : ForCodeSize(ForCodeSize), Loads(1) {
15934       EVT TruncType = LS.Inst->getValueType(0);
15935       EVT LoadedType = LS.getLoadedType();
15936       if (TruncType != LoadedType &&
15937           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
15938         ZExts = 1;
15939     }
15940
15941     /// Account for slicing gain in the current cost.
15942     /// Slicing provide a few gains like removing a shift or a
15943     /// truncate. This method allows to grow the cost of the original
15944     /// load with the gain from this slice.
15945     void addSliceGain(const LoadedSlice &LS) {
15946       // Each slice saves a truncate.
15947       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
15948       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
15949                               LS.Inst->getValueType(0)))
15950         ++Truncates;
15951       // If there is a shift amount, this slice gets rid of it.
15952       if (LS.Shift)
15953         ++Shift;
15954       // If this slice can merge a cross register bank copy, account for it.
15955       if (LS.canMergeExpensiveCrossRegisterBankCopy())
15956         ++CrossRegisterBanksCopies;
15957     }
15958
15959     Cost &operator+=(const Cost &RHS) {
15960       Loads += RHS.Loads;
15961       Truncates += RHS.Truncates;
15962       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
15963       ZExts += RHS.ZExts;
15964       Shift += RHS.Shift;
15965       return *this;
15966     }
15967
15968     bool operator==(const Cost &RHS) const {
15969       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
15970              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
15971              ZExts == RHS.ZExts && Shift == RHS.Shift;
15972     }
15973
15974     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
15975
15976     bool operator<(const Cost &RHS) const {
15977       // Assume cross register banks copies are as expensive as loads.
15978       // FIXME: Do we want some more target hooks?
15979       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
15980       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
15981       // Unless we are optimizing for code size, consider the
15982       // expensive operation first.
15983       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
15984         return ExpensiveOpsLHS < ExpensiveOpsRHS;
15985       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
15986              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
15987     }
15988
15989     bool operator>(const Cost &RHS) const { return RHS < *this; }
15990
15991     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
15992
15993     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
15994   };
15995
15996   // The last instruction that represent the slice. This should be a
15997   // truncate instruction.
15998   SDNode *Inst;
15999
16000   // The original load instruction.
16001   LoadSDNode *Origin;
16002
16003   // The right shift amount in bits from the original load.
16004   unsigned Shift;
16005
16006   // The DAG from which Origin came from.
16007   // This is used to get some contextual information about legal types, etc.
16008   SelectionDAG *DAG;
16009
16010   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16011               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16012       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16013
16014   /// Get the bits used in a chunk of bits \p BitWidth large.
16015   /// \return Result is \p BitWidth and has used bits set to 1 and
16016   ///         not used bits set to 0.
16017   APInt getUsedBits() const {
16018     // Reproduce the trunc(lshr) sequence:
16019     // - Start from the truncated value.
16020     // - Zero extend to the desired bit width.
16021     // - Shift left.
16022     assert(Origin && "No original load to compare against.");
16023     unsigned BitWidth = Origin->getValueSizeInBits(0);
16024     assert(Inst && "This slice is not bound to an instruction");
16025     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
16026            "Extracted slice is bigger than the whole type!");
16027     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16028     UsedBits.setAllBits();
16029     UsedBits = UsedBits.zext(BitWidth);
16030     UsedBits <<= Shift;
16031     return UsedBits;
16032   }
16033
16034   /// Get the size of the slice to be loaded in bytes.
16035   unsigned getLoadedSize() const {
16036     unsigned SliceSize = getUsedBits().countPopulation();
16037     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
16038     return SliceSize / 8;
16039   }
16040
16041   /// Get the type that will be loaded for this slice.
16042   /// Note: This may not be the final type for the slice.
16043   EVT getLoadedType() const {
16044     assert(DAG && "Missing context");
16045     LLVMContext &Ctxt = *DAG->getContext();
16046     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16047   }
16048
16049   /// Get the alignment of the load used for this slice.
16050   Align getAlign() const {
16051     Align Alignment = Origin->getAlign();
16052     uint64_t Offset = getOffsetFromBase();
16053     if (Offset != 0)
16054       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16055     return Alignment;
16056   }
16057
16058   /// Check if this slice can be rewritten with legal operations.
16059   bool isLegal() const {
16060     // An invalid slice is not legal.
16061     if (!Origin || !Inst || !DAG)
16062       return false;
16063
16064     // Offsets are for indexed load only, we do not handle that.
16065     if (!Origin->getOffset().isUndef())
16066       return false;
16067
16068     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16069
16070     // Check that the type is legal.
16071     EVT SliceType = getLoadedType();
16072     if (!TLI.isTypeLegal(SliceType))
16073       return false;
16074
16075     // Check that the load is legal for this type.
16076     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16077       return false;
16078
16079     // Check that the offset can be computed.
16080     // 1. Check its type.
16081     EVT PtrType = Origin->getBasePtr().getValueType();
16082     if (PtrType == MVT::Untyped || PtrType.isExtended())
16083       return false;
16084
16085     // 2. Check that it fits in the immediate.
16086     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16087       return false;
16088
16089     // 3. Check that the computation is legal.
16090     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16091       return false;
16092
16093     // Check that the zext is legal if it needs one.
16094     EVT TruncateType = Inst->getValueType(0);
16095     if (TruncateType != SliceType &&
16096         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
16097       return false;
16098
16099     return true;
16100   }
16101
16102   /// Get the offset in bytes of this slice in the original chunk of
16103   /// bits.
16104   /// \pre DAG != nullptr.
16105   uint64_t getOffsetFromBase() const {
16106     assert(DAG && "Missing context.");
16107     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16108     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
16109     uint64_t Offset = Shift / 8;
16110     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16111     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
16112            "The size of the original loaded type is not a multiple of a"
16113            " byte.");
16114     // If Offset is bigger than TySizeInBytes, it means we are loading all
16115     // zeros. This should have been optimized before in the process.
16116     assert(TySizeInBytes > Offset &&
16117            "Invalid shift amount for given loaded size");
16118     if (IsBigEndian)
16119       Offset = TySizeInBytes - Offset - getLoadedSize();
16120     return Offset;
16121   }
16122
16123   /// Generate the sequence of instructions to load the slice
16124   /// represented by this object and redirect the uses of this slice to
16125   /// this new sequence of instructions.
16126   /// \pre this->Inst && this->Origin are valid Instructions and this
16127   /// object passed the legal check: LoadedSlice::isLegal returned true.
16128   /// \return The last instruction of the sequence used to load the slice.
16129   SDValue loadSlice() const {
16130     assert(Inst && Origin && "Unable to replace a non-existing slice.");
16131     const SDValue &OldBaseAddr = Origin->getBasePtr();
16132     SDValue BaseAddr = OldBaseAddr;
16133     // Get the offset in that chunk of bytes w.r.t. the endianness.
16134     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16135     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
16136     if (Offset) {
16137       // BaseAddr = BaseAddr + Offset.
16138       EVT ArithType = BaseAddr.getValueType();
16139       SDLoc DL(Origin);
16140       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16141                               DAG->getConstant(Offset, DL, ArithType));
16142     }
16143
16144     // Create the type of the loaded slice according to its size.
16145     EVT SliceType = getLoadedType();
16146
16147     // Create the load for the slice.
16148     SDValue LastInst =
16149         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16150                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
16151                      Origin->getMemOperand()->getFlags());
16152     // If the final type is not the same as the loaded type, this means that
16153     // we have to pad with zero. Create a zero extend for that.
16154     EVT FinalType = Inst->getValueType(0);
16155     if (SliceType != FinalType)
16156       LastInst =
16157           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16158     return LastInst;
16159   }
16160
16161   /// Check if this slice can be merged with an expensive cross register
16162   /// bank copy. E.g.,
16163   /// i = load i32
16164   /// f = bitcast i32 i to float
16165   bool canMergeExpensiveCrossRegisterBankCopy() const {
16166     if (!Inst || !Inst->hasOneUse())
16167       return false;
16168     SDNode *Use = *Inst->use_begin();
16169     if (Use->getOpcode() != ISD::BITCAST)
16170       return false;
16171     assert(DAG && "Missing context");
16172     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16173     EVT ResVT = Use->getValueType(0);
16174     const TargetRegisterClass *ResRC =
16175         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16176     const TargetRegisterClass *ArgRC =
16177         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16178                            Use->getOperand(0)->isDivergent());
16179     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16180       return false;
16181
16182     // At this point, we know that we perform a cross-register-bank copy.
16183     // Check if it is expensive.
16184     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
16185     // Assume bitcasts are cheap, unless both register classes do not
16186     // explicitly share a common sub class.
16187     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16188       return false;
16189
16190     // Check if it will be merged with the load.
16191     // 1. Check the alignment constraint.
16192     Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
16193         ResVT.getTypeForEVT(*DAG->getContext()));
16194
16195     if (RequiredAlignment > getAlign())
16196       return false;
16197
16198     // 2. Check that the load is a legal operation for that type.
16199     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16200       return false;
16201
16202     // 3. Check that we do not have a zext in the way.
16203     if (Inst->getValueType(0) != getLoadedType())
16204       return false;
16205
16206     return true;
16207   }
16208 };
16209
16210 } // end anonymous namespace
16211
16212 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
16213 /// \p UsedBits looks like 0..0 1..1 0..0.
16214 static bool areUsedBitsDense(const APInt &UsedBits) {
16215   // If all the bits are one, this is dense!
16216   if (UsedBits.isAllOnesValue())
16217     return true;
16218
16219   // Get rid of the unused bits on the right.
16220   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16221   // Get rid of the unused bits on the left.
16222   if (NarrowedUsedBits.countLeadingZeros())
16223     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16224   // Check that the chunk of bits is completely used.
16225   return NarrowedUsedBits.isAllOnesValue();
16226 }
16227
16228 /// Check whether or not \p First and \p Second are next to each other
16229 /// in memory. This means that there is no hole between the bits loaded
16230 /// by \p First and the bits loaded by \p Second.
16231 static bool areSlicesNextToEachOther(const LoadedSlice &First,
16232                                      const LoadedSlice &Second) {
16233   assert(First.Origin == Second.Origin && First.Origin &&
16234          "Unable to match different memory origins.");
16235   APInt UsedBits = First.getUsedBits();
16236   assert((UsedBits & Second.getUsedBits()) == 0 &&
16237          "Slices are not supposed to overlap.");
16238   UsedBits |= Second.getUsedBits();
16239   return areUsedBitsDense(UsedBits);
16240 }
16241
16242 /// Adjust the \p GlobalLSCost according to the target
16243 /// paring capabilities and the layout of the slices.
16244 /// \pre \p GlobalLSCost should account for at least as many loads as
16245 /// there is in the slices in \p LoadedSlices.
16246 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16247                                  LoadedSlice::Cost &GlobalLSCost) {
16248   unsigned NumberOfSlices = LoadedSlices.size();
16249   // If there is less than 2 elements, no pairing is possible.
16250   if (NumberOfSlices < 2)
16251     return;
16252
16253   // Sort the slices so that elements that are likely to be next to each
16254   // other in memory are next to each other in the list.
16255   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16256     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
16257     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16258   });
16259   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16260   // First (resp. Second) is the first (resp. Second) potentially candidate
16261   // to be placed in a paired load.
16262   const LoadedSlice *First = nullptr;
16263   const LoadedSlice *Second = nullptr;
16264   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16265                 // Set the beginning of the pair.
16266                                                            First = Second) {
16267     Second = &LoadedSlices[CurrSlice];
16268
16269     // If First is NULL, it means we start a new pair.
16270     // Get to the next slice.
16271     if (!First)
16272       continue;
16273
16274     EVT LoadedType = First->getLoadedType();
16275
16276     // If the types of the slices are different, we cannot pair them.
16277     if (LoadedType != Second->getLoadedType())
16278       continue;
16279
16280     // Check if the target supplies paired loads for this type.
16281     Align RequiredAlignment;
16282     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16283       // move to the next pair, this type is hopeless.
16284       Second = nullptr;
16285       continue;
16286     }
16287     // Check if we meet the alignment requirement.
16288     if (First->getAlign() < RequiredAlignment)
16289       continue;
16290
16291     // Check that both loads are next to each other in memory.
16292     if (!areSlicesNextToEachOther(*First, *Second))
16293       continue;
16294
16295     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
16296     --GlobalLSCost.Loads;
16297     // Move to the next pair.
16298     Second = nullptr;
16299   }
16300 }
16301
16302 /// Check the profitability of all involved LoadedSlice.
16303 /// Currently, it is considered profitable if there is exactly two
16304 /// involved slices (1) which are (2) next to each other in memory, and
16305 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16306 ///
16307 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
16308 /// the elements themselves.
16309 ///
16310 /// FIXME: When the cost model will be mature enough, we can relax
16311 /// constraints (1) and (2).
16312 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16313                                 const APInt &UsedBits, bool ForCodeSize) {
16314   unsigned NumberOfSlices = LoadedSlices.size();
16315   if (StressLoadSlicing)
16316     return NumberOfSlices > 1;
16317
16318   // Check (1).
16319   if (NumberOfSlices != 2)
16320     return false;
16321
16322   // Check (2).
16323   if (!areUsedBitsDense(UsedBits))
16324     return false;
16325
16326   // Check (3).
16327   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16328   // The original code has one big load.
16329   OrigCost.Loads = 1;
16330   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16331     const LoadedSlice &LS = LoadedSlices[CurrSlice];
16332     // Accumulate the cost of all the slices.
16333     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16334     GlobalSlicingCost += SliceCost;
16335
16336     // Account as cost in the original configuration the gain obtained
16337     // with the current slices.
16338     OrigCost.addSliceGain(LS);
16339   }
16340
16341   // If the target supports paired load, adjust the cost accordingly.
16342   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16343   return OrigCost > GlobalSlicingCost;
16344 }
16345
16346 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
16347 /// operations, split it in the various pieces being extracted.
16348 ///
16349 /// This sort of thing is introduced by SROA.
16350 /// This slicing takes care not to insert overlapping loads.
16351 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
16352 bool DAGCombiner::SliceUpLoad(SDNode *N) {
16353   if (Level < AfterLegalizeDAG)
16354     return false;
16355
16356   LoadSDNode *LD = cast<LoadSDNode>(N);
16357   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16358       !LD->getValueType(0).isInteger())
16359     return false;
16360
16361   // The algorithm to split up a load of a scalable vector into individual
16362   // elements currently requires knowing the length of the loaded type,
16363   // so will need adjusting to work on scalable vectors.
16364   if (LD->getValueType(0).isScalableVector())
16365     return false;
16366
16367   // Keep track of already used bits to detect overlapping values.
16368   // In that case, we will just abort the transformation.
16369   APInt UsedBits(LD->getValueSizeInBits(0), 0);
16370
16371   SmallVector<LoadedSlice, 4> LoadedSlices;
16372
16373   // Check if this load is used as several smaller chunks of bits.
16374   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16375   // of computation for each trunc.
16376   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16377        UI != UIEnd; ++UI) {
16378     // Skip the uses of the chain.
16379     if (UI.getUse().getResNo() != 0)
16380       continue;
16381
16382     SDNode *User = *UI;
16383     unsigned Shift = 0;
16384
16385     // Check if this is a trunc(lshr).
16386     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16387         isa<ConstantSDNode>(User->getOperand(1))) {
16388       Shift = User->getConstantOperandVal(1);
16389       User = *User->use_begin();
16390     }
16391
16392     // At this point, User is a Truncate, iff we encountered, trunc or
16393     // trunc(lshr).
16394     if (User->getOpcode() != ISD::TRUNCATE)
16395       return false;
16396
16397     // The width of the type must be a power of 2 and greater than 8-bits.
16398     // Otherwise the load cannot be represented in LLVM IR.
16399     // Moreover, if we shifted with a non-8-bits multiple, the slice
16400     // will be across several bytes. We do not support that.
16401     unsigned Width = User->getValueSizeInBits(0);
16402     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16403       return false;
16404
16405     // Build the slice for this chain of computations.
16406     LoadedSlice LS(User, LD, Shift, &DAG);
16407     APInt CurrentUsedBits = LS.getUsedBits();
16408
16409     // Check if this slice overlaps with another.
16410     if ((CurrentUsedBits & UsedBits) != 0)
16411       return false;
16412     // Update the bits used globally.
16413     UsedBits |= CurrentUsedBits;
16414
16415     // Check if the new slice would be legal.
16416     if (!LS.isLegal())
16417       return false;
16418
16419     // Record the slice.
16420     LoadedSlices.push_back(LS);
16421   }
16422
16423   // Abort slicing if it does not seem to be profitable.
16424   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16425     return false;
16426
16427   ++SlicedLoads;
16428
16429   // Rewrite each chain to use an independent load.
16430   // By construction, each chain can be represented by a unique load.
16431
16432   // Prepare the argument for the new token factor for all the slices.
16433   SmallVector<SDValue, 8> ArgChains;
16434   for (const LoadedSlice &LS : LoadedSlices) {
16435     SDValue SliceInst = LS.loadSlice();
16436     CombineTo(LS.Inst, SliceInst, true);
16437     if (SliceInst.getOpcode() != ISD::LOAD)
16438       SliceInst = SliceInst.getOperand(0);
16439     assert(SliceInst->getOpcode() == ISD::LOAD &&
16440            "It takes more than a zext to get to the loaded slice!!");
16441     ArgChains.push_back(SliceInst.getValue(1));
16442   }
16443
16444   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
16445                               ArgChains);
16446   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16447   AddToWorklist(Chain.getNode());
16448   return true;
16449 }
16450
16451 /// Check to see if V is (and load (ptr), imm), where the load is having
16452 /// specific bytes cleared out.  If so, return the byte size being masked out
16453 /// and the shift amount.
16454 static std::pair<unsigned, unsigned>
16455 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
16456   std::pair<unsigned, unsigned> Result(0, 0);
16457
16458   // Check for the structure we're looking for.
16459   if (V->getOpcode() != ISD::AND ||
16460       !isa<ConstantSDNode>(V->getOperand(1)) ||
16461       !ISD::isNormalLoad(V->getOperand(0).getNode()))
16462     return Result;
16463
16464   // Check the chain and pointer.
16465   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16466   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
16467
16468   // This only handles simple types.
16469   if (V.getValueType() != MVT::i16 &&
16470       V.getValueType() != MVT::i32 &&
16471       V.getValueType() != MVT::i64)
16472     return Result;
16473
16474   // Check the constant mask.  Invert it so that the bits being masked out are
16475   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
16476   // follow the sign bit for uniformity.
16477   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16478   unsigned NotMaskLZ = countLeadingZeros(NotMask);
16479   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
16480   unsigned NotMaskTZ = countTrailingZeros(NotMask);
16481   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
16482   if (NotMaskLZ == 64) return Result;  // All zero mask.
16483
16484   // See if we have a continuous run of bits.  If so, we have 0*1+0*
16485   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16486     return Result;
16487
16488   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16489   if (V.getValueType() != MVT::i64 && NotMaskLZ)
16490     NotMaskLZ -= 64-V.getValueSizeInBits();
16491
16492   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16493   switch (MaskedBytes) {
16494   case 1:
16495   case 2:
16496   case 4: break;
16497   default: return Result; // All one mask, or 5-byte mask.
16498   }
16499
16500   // Verify that the first bit starts at a multiple of mask so that the access
16501   // is aligned the same as the access width.
16502   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16503
16504   // For narrowing to be valid, it must be the case that the load the
16505   // immediately preceding memory operation before the store.
16506   if (LD == Chain.getNode())
16507     ; // ok.
16508   else if (Chain->getOpcode() == ISD::TokenFactor &&
16509            SDValue(LD, 1).hasOneUse()) {
16510     // LD has only 1 chain use so they are no indirect dependencies.
16511     if (!LD->isOperandOf(Chain.getNode()))
16512       return Result;
16513   } else
16514     return Result; // Fail.
16515
16516   Result.first = MaskedBytes;
16517   Result.second = NotMaskTZ/8;
16518   return Result;
16519 }
16520
16521 /// Check to see if IVal is something that provides a value as specified by
16522 /// MaskInfo. If so, replace the specified store with a narrower store of
16523 /// truncated IVal.
16524 static SDValue
16525 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
16526                                 SDValue IVal, StoreSDNode *St,
16527                                 DAGCombiner *DC) {
16528   unsigned NumBytes = MaskInfo.first;
16529   unsigned ByteShift = MaskInfo.second;
16530   SelectionDAG &DAG = DC->getDAG();
16531
16532   // Check to see if IVal is all zeros in the part being masked in by the 'or'
16533   // that uses this.  If not, this is not a replacement.
16534   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
16535                                   ByteShift*8, (ByteShift+NumBytes)*8);
16536   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
16537
16538   // Check that it is legal on the target to do this.  It is legal if the new
16539   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
16540   // legalization (and the target doesn't explicitly think this is a bad idea).
16541   MVT VT = MVT::getIntegerVT(NumBytes * 8);
16542   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16543   if (!DC->isTypeLegal(VT))
16544     return SDValue();
16545   if (St->getMemOperand() &&
16546       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16547                               *St->getMemOperand()))
16548     return SDValue();
16549
16550   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
16551   // shifted by ByteShift and truncated down to NumBytes.
16552   if (ByteShift) {
16553     SDLoc DL(IVal);
16554     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
16555                        DAG.getConstant(ByteShift*8, DL,
16556                                     DC->getShiftAmountTy(IVal.getValueType())));
16557   }
16558
16559   // Figure out the offset for the store and the alignment of the access.
16560   unsigned StOffset;
16561   if (DAG.getDataLayout().isLittleEndian())
16562     StOffset = ByteShift;
16563   else
16564     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
16565
16566   SDValue Ptr = St->getBasePtr();
16567   if (StOffset) {
16568     SDLoc DL(IVal);
16569     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
16570   }
16571
16572   // Truncate down to the new size.
16573   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
16574
16575   ++OpsNarrowed;
16576   return DAG
16577       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
16578                 St->getPointerInfo().getWithOffset(StOffset),
16579                 St->getOriginalAlign());
16580 }
16581
16582 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
16583 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
16584 /// narrowing the load and store if it would end up being a win for performance
16585 /// or code size.
16586 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
16587   StoreSDNode *ST  = cast<StoreSDNode>(N);
16588   if (!ST->isSimple())
16589     return SDValue();
16590
16591   SDValue Chain = ST->getChain();
16592   SDValue Value = ST->getValue();
16593   SDValue Ptr   = ST->getBasePtr();
16594   EVT VT = Value.getValueType();
16595
16596   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
16597     return SDValue();
16598
16599   unsigned Opc = Value.getOpcode();
16600
16601   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
16602   // is a byte mask indicating a consecutive number of bytes, check to see if
16603   // Y is known to provide just those bytes.  If so, we try to replace the
16604   // load + replace + store sequence with a single (narrower) store, which makes
16605   // the load dead.
16606   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
16607     std::pair<unsigned, unsigned> MaskedLoad;
16608     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
16609     if (MaskedLoad.first)
16610       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16611                                                   Value.getOperand(1), ST,this))
16612         return NewST;
16613
16614     // Or is commutative, so try swapping X and Y.
16615     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
16616     if (MaskedLoad.first)
16617       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16618                                                   Value.getOperand(0), ST,this))
16619         return NewST;
16620   }
16621
16622   if (!EnableReduceLoadOpStoreWidth)
16623     return SDValue();
16624
16625   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
16626       Value.getOperand(1).getOpcode() != ISD::Constant)
16627     return SDValue();
16628
16629   SDValue N0 = Value.getOperand(0);
16630   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16631       Chain == SDValue(N0.getNode(), 1)) {
16632     LoadSDNode *LD = cast<LoadSDNode>(N0);
16633     if (LD->getBasePtr() != Ptr ||
16634         LD->getPointerInfo().getAddrSpace() !=
16635         ST->getPointerInfo().getAddrSpace())
16636       return SDValue();
16637
16638     // Find the type to narrow it the load / op / store to.
16639     SDValue N1 = Value.getOperand(1);
16640     unsigned BitWidth = N1.getValueSizeInBits();
16641     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
16642     if (Opc == ISD::AND)
16643       Imm ^= APInt::getAllOnesValue(BitWidth);
16644     if (Imm == 0 || Imm.isAllOnesValue())
16645       return SDValue();
16646     unsigned ShAmt = Imm.countTrailingZeros();
16647     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
16648     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
16649     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16650     // The narrowing should be profitable, the load/store operation should be
16651     // legal (or custom) and the store size should be equal to the NewVT width.
16652     while (NewBW < BitWidth &&
16653            (NewVT.getStoreSizeInBits() != NewBW ||
16654             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
16655             !TLI.isNarrowingProfitable(VT, NewVT))) {
16656       NewBW = NextPowerOf2(NewBW);
16657       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16658     }
16659     if (NewBW >= BitWidth)
16660       return SDValue();
16661
16662     // If the lsb changed does not start at the type bitwidth boundary,
16663     // start at the previous one.
16664     if (ShAmt % NewBW)
16665       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
16666     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
16667                                    std::min(BitWidth, ShAmt + NewBW));
16668     if ((Imm & Mask) == Imm) {
16669       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
16670       if (Opc == ISD::AND)
16671         NewImm ^= APInt::getAllOnesValue(NewBW);
16672       uint64_t PtrOff = ShAmt / 8;
16673       // For big endian targets, we need to adjust the offset to the pointer to
16674       // load the correct bytes.
16675       if (DAG.getDataLayout().isBigEndian())
16676         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
16677
16678       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
16679       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
16680       if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
16681         return SDValue();
16682
16683       SDValue NewPtr =
16684           DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
16685       SDValue NewLD =
16686           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
16687                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
16688                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
16689       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
16690                                    DAG.getConstant(NewImm, SDLoc(Value),
16691                                                    NewVT));
16692       SDValue NewST =
16693           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
16694                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
16695
16696       AddToWorklist(NewPtr.getNode());
16697       AddToWorklist(NewLD.getNode());
16698       AddToWorklist(NewVal.getNode());
16699       WorklistRemover DeadNodes(*this);
16700       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
16701       ++OpsNarrowed;
16702       return NewST;
16703     }
16704   }
16705
16706   return SDValue();
16707 }
16708
16709 /// For a given floating point load / store pair, if the load value isn't used
16710 /// by any other operations, then consider transforming the pair to integer
16711 /// load / store operations if the target deems the transformation profitable.
16712 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
16713   StoreSDNode *ST  = cast<StoreSDNode>(N);
16714   SDValue Value = ST->getValue();
16715   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
16716       Value.hasOneUse()) {
16717     LoadSDNode *LD = cast<LoadSDNode>(Value);
16718     EVT VT = LD->getMemoryVT();
16719     if (!VT.isFloatingPoint() ||
16720         VT != ST->getMemoryVT() ||
16721         LD->isNonTemporal() ||
16722         ST->isNonTemporal() ||
16723         LD->getPointerInfo().getAddrSpace() != 0 ||
16724         ST->getPointerInfo().getAddrSpace() != 0)
16725       return SDValue();
16726
16727     TypeSize VTSize = VT.getSizeInBits();
16728
16729     // We don't know the size of scalable types at compile time so we cannot
16730     // create an integer of the equivalent size.
16731     if (VTSize.isScalable())
16732       return SDValue();
16733
16734     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
16735     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
16736         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
16737         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
16738         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
16739       return SDValue();
16740
16741     Align LDAlign = LD->getAlign();
16742     Align STAlign = ST->getAlign();
16743     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
16744     Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
16745     if (LDAlign < ABIAlign || STAlign < ABIAlign)
16746       return SDValue();
16747
16748     SDValue NewLD =
16749         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
16750                     LD->getPointerInfo(), LDAlign);
16751
16752     SDValue NewST =
16753         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
16754                      ST->getPointerInfo(), STAlign);
16755
16756     AddToWorklist(NewLD.getNode());
16757     AddToWorklist(NewST.getNode());
16758     WorklistRemover DeadNodes(*this);
16759     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
16760     ++LdStFP2Int;
16761     return NewST;
16762   }
16763
16764   return SDValue();
16765 }
16766
16767 // This is a helper function for visitMUL to check the profitability
16768 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
16769 // MulNode is the original multiply, AddNode is (add x, c1),
16770 // and ConstNode is c2.
16771 //
16772 // If the (add x, c1) has multiple uses, we could increase
16773 // the number of adds if we make this transformation.
16774 // It would only be worth doing this if we can remove a
16775 // multiply in the process. Check for that here.
16776 // To illustrate:
16777 //     (A + c1) * c3
16778 //     (A + c2) * c3
16779 // We're checking for cases where we have common "c3 * A" expressions.
16780 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
16781                                               SDValue &AddNode,
16782                                               SDValue &ConstNode) {
16783   APInt Val;
16784
16785   // If the add only has one use, this would be OK to do.
16786   if (AddNode.getNode()->hasOneUse())
16787     return true;
16788
16789   // Walk all the users of the constant with which we're multiplying.
16790   for (SDNode *Use : ConstNode->uses()) {
16791     if (Use == MulNode) // This use is the one we're on right now. Skip it.
16792       continue;
16793
16794     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
16795       SDNode *OtherOp;
16796       SDNode *MulVar = AddNode.getOperand(0).getNode();
16797
16798       // OtherOp is what we're multiplying against the constant.
16799       if (Use->getOperand(0) == ConstNode)
16800         OtherOp = Use->getOperand(1).getNode();
16801       else
16802         OtherOp = Use->getOperand(0).getNode();
16803
16804       // Check to see if multiply is with the same operand of our "add".
16805       //
16806       //     ConstNode  = CONST
16807       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
16808       //     ...
16809       //     AddNode  = (A + c1)  <-- MulVar is A.
16810       //         = AddNode * ConstNode   <-- current visiting instruction.
16811       //
16812       // If we make this transformation, we will have a common
16813       // multiply (ConstNode * A) that we can save.
16814       if (OtherOp == MulVar)
16815         return true;
16816
16817       // Now check to see if a future expansion will give us a common
16818       // multiply.
16819       //
16820       //     ConstNode  = CONST
16821       //     AddNode    = (A + c1)
16822       //     ...   = AddNode * ConstNode <-- current visiting instruction.
16823       //     ...
16824       //     OtherOp = (A + c2)
16825       //     Use     = OtherOp * ConstNode <-- visiting Use.
16826       //
16827       // If we make this transformation, we will have a common
16828       // multiply (CONST * A) after we also do the same transformation
16829       // to the "t2" instruction.
16830       if (OtherOp->getOpcode() == ISD::ADD &&
16831           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
16832           OtherOp->getOperand(0).getNode() == MulVar)
16833         return true;
16834     }
16835   }
16836
16837   // Didn't find a case where this would be profitable.
16838   return false;
16839 }
16840
16841 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
16842                                          unsigned NumStores) {
16843   SmallVector<SDValue, 8> Chains;
16844   SmallPtrSet<const SDNode *, 8> Visited;
16845   SDLoc StoreDL(StoreNodes[0].MemNode);
16846
16847   for (unsigned i = 0; i < NumStores; ++i) {
16848     Visited.insert(StoreNodes[i].MemNode);
16849   }
16850
16851   // don't include nodes that are children or repeated nodes.
16852   for (unsigned i = 0; i < NumStores; ++i) {
16853     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
16854       Chains.push_back(StoreNodes[i].MemNode->getChain());
16855   }
16856
16857   assert(Chains.size() > 0 && "Chain should have generated a chain");
16858   return DAG.getTokenFactor(StoreDL, Chains);
16859 }
16860
16861 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
16862     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
16863     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
16864   // Make sure we have something to merge.
16865   if (NumStores < 2)
16866     return false;
16867
16868   assert((!UseTrunc || !UseVector) &&
16869          "This optimization cannot emit a vector truncating store");
16870
16871   // The latest Node in the DAG.
16872   SDLoc DL(StoreNodes[0].MemNode);
16873
16874   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
16875   unsigned SizeInBits = NumStores * ElementSizeBits;
16876   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16877
16878   EVT StoreTy;
16879   if (UseVector) {
16880     unsigned Elts = NumStores * NumMemElts;
16881     // Get the type for the merged vector store.
16882     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16883   } else
16884     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
16885
16886   SDValue StoredVal;
16887   if (UseVector) {
16888     if (IsConstantSrc) {
16889       SmallVector<SDValue, 8> BuildVector;
16890       for (unsigned I = 0; I != NumStores; ++I) {
16891         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
16892         SDValue Val = St->getValue();
16893         // If constant is of the wrong type, convert it now.
16894         if (MemVT != Val.getValueType()) {
16895           Val = peekThroughBitcasts(Val);
16896           // Deal with constants of wrong size.
16897           if (ElementSizeBits != Val.getValueSizeInBits()) {
16898             EVT IntMemVT =
16899                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
16900             if (isa<ConstantFPSDNode>(Val)) {
16901               // Not clear how to truncate FP values.
16902               return false;
16903             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
16904               Val = DAG.getConstant(C->getAPIntValue()
16905                                         .zextOrTrunc(Val.getValueSizeInBits())
16906                                         .zextOrTrunc(ElementSizeBits),
16907                                     SDLoc(C), IntMemVT);
16908           }
16909           // Make sure correctly size type is the correct type.
16910           Val = DAG.getBitcast(MemVT, Val);
16911         }
16912         BuildVector.push_back(Val);
16913       }
16914       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16915                                                : ISD::BUILD_VECTOR,
16916                               DL, StoreTy, BuildVector);
16917     } else {
16918       SmallVector<SDValue, 8> Ops;
16919       for (unsigned i = 0; i < NumStores; ++i) {
16920         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16921         SDValue Val = peekThroughBitcasts(St->getValue());
16922         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
16923         // type MemVT. If the underlying value is not the correct
16924         // type, but it is an extraction of an appropriate vector we
16925         // can recast Val to be of the correct type. This may require
16926         // converting between EXTRACT_VECTOR_ELT and
16927         // EXTRACT_SUBVECTOR.
16928         if ((MemVT != Val.getValueType()) &&
16929             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
16930              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
16931           EVT MemVTScalarTy = MemVT.getScalarType();
16932           // We may need to add a bitcast here to get types to line up.
16933           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
16934             Val = DAG.getBitcast(MemVT, Val);
16935           } else {
16936             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
16937                                             : ISD::EXTRACT_VECTOR_ELT;
16938             SDValue Vec = Val.getOperand(0);
16939             SDValue Idx = Val.getOperand(1);
16940             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
16941           }
16942         }
16943         Ops.push_back(Val);
16944       }
16945
16946       // Build the extracted vector elements back into a vector.
16947       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16948                                                : ISD::BUILD_VECTOR,
16949                               DL, StoreTy, Ops);
16950     }
16951   } else {
16952     // We should always use a vector store when merging extracted vector
16953     // elements, so this path implies a store of constants.
16954     assert(IsConstantSrc && "Merged vector elements should use vector store");
16955
16956     APInt StoreInt(SizeInBits, 0);
16957
16958     // Construct a single integer constant which is made of the smaller
16959     // constant inputs.
16960     bool IsLE = DAG.getDataLayout().isLittleEndian();
16961     for (unsigned i = 0; i < NumStores; ++i) {
16962       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
16963       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
16964
16965       SDValue Val = St->getValue();
16966       Val = peekThroughBitcasts(Val);
16967       StoreInt <<= ElementSizeBits;
16968       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
16969         StoreInt |= C->getAPIntValue()
16970                         .zextOrTrunc(ElementSizeBits)
16971                         .zextOrTrunc(SizeInBits);
16972       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
16973         StoreInt |= C->getValueAPF()
16974                         .bitcastToAPInt()
16975                         .zextOrTrunc(ElementSizeBits)
16976                         .zextOrTrunc(SizeInBits);
16977         // If fp truncation is necessary give up for now.
16978         if (MemVT.getSizeInBits() != ElementSizeBits)
16979           return false;
16980       } else {
16981         llvm_unreachable("Invalid constant element type");
16982       }
16983     }
16984
16985     // Create the new Load and Store operations.
16986     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
16987   }
16988
16989   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16990   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
16991
16992   // make sure we use trunc store if it's necessary to be legal.
16993   SDValue NewStore;
16994   if (!UseTrunc) {
16995     NewStore =
16996         DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
16997                      FirstInChain->getPointerInfo(), FirstInChain->getAlign());
16998   } else { // Must be realized as a trunc store
16999     EVT LegalizedStoredValTy =
17000         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17001     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17002     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17003     SDValue ExtendedStoreVal =
17004         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17005                         LegalizedStoredValTy);
17006     NewStore = DAG.getTruncStore(
17007         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17008         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17009         FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17010   }
17011
17012   // Replace all merged stores with the new store.
17013   for (unsigned i = 0; i < NumStores; ++i)
17014     CombineTo(StoreNodes[i].MemNode, NewStore);
17015
17016   AddToWorklist(NewChain.getNode());
17017   return true;
17018 }
17019
17020 void DAGCombiner::getStoreMergeCandidates(
17021     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17022     SDNode *&RootNode) {
17023   // This holds the base pointer, index, and the offset in bytes from the base
17024   // pointer. We must have a base and an offset. Do not handle stores to undef
17025   // base pointers.
17026   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17027   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17028     return;
17029
17030   SDValue Val = peekThroughBitcasts(St->getValue());
17031   StoreSource StoreSrc = getStoreSource(Val);
17032   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
17033
17034   // Match on loadbaseptr if relevant.
17035   EVT MemVT = St->getMemoryVT();
17036   BaseIndexOffset LBasePtr;
17037   EVT LoadVT;
17038   if (StoreSrc == StoreSource::Load) {
17039     auto *Ld = cast<LoadSDNode>(Val);
17040     LBasePtr = BaseIndexOffset::match(Ld, DAG);
17041     LoadVT = Ld->getMemoryVT();
17042     // Load and store should be the same type.
17043     if (MemVT != LoadVT)
17044       return;
17045     // Loads must only have one use.
17046     if (!Ld->hasNUsesOfValue(1, 0))
17047       return;
17048     // The memory operands must not be volatile/indexed/atomic.
17049     // TODO: May be able to relax for unordered atomics (see D66309)
17050     if (!Ld->isSimple() || Ld->isIndexed())
17051       return;
17052   }
17053   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17054                             int64_t &Offset) -> bool {
17055     // The memory operands must not be volatile/indexed/atomic.
17056     // TODO: May be able to relax for unordered atomics (see D66309)
17057     if (!Other->isSimple() || Other->isIndexed())
17058       return false;
17059     // Don't mix temporal stores with non-temporal stores.
17060     if (St->isNonTemporal() != Other->isNonTemporal())
17061       return false;
17062     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
17063     // Allow merging constants of different types as integers.
17064     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17065                                            : Other->getMemoryVT() != MemVT;
17066     switch (StoreSrc) {
17067     case StoreSource::Load: {
17068       if (NoTypeMatch)
17069         return false;
17070       // The Load's Base Ptr must also match.
17071       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
17072       if (!OtherLd)
17073         return false;
17074       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
17075       if (LoadVT != OtherLd->getMemoryVT())
17076         return false;
17077       // Loads must only have one use.
17078       if (!OtherLd->hasNUsesOfValue(1, 0))
17079         return false;
17080       // The memory operands must not be volatile/indexed/atomic.
17081       // TODO: May be able to relax for unordered atomics (see D66309)
17082       if (!OtherLd->isSimple() || OtherLd->isIndexed())
17083         return false;
17084       // Don't mix temporal loads with non-temporal loads.
17085       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17086         return false;
17087       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17088         return false;
17089       break;
17090     }
17091     case StoreSource::Constant:
17092       if (NoTypeMatch)
17093         return false;
17094       if (!isIntOrFPConstant(OtherBC))
17095         return false;
17096       break;
17097     case StoreSource::Extract:
17098       // Do not merge truncated stores here.
17099       if (Other->isTruncatingStore())
17100         return false;
17101       if (!MemVT.bitsEq(OtherBC.getValueType()))
17102         return false;
17103       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17104           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17105         return false;
17106       break;
17107     default:
17108       llvm_unreachable("Unhandled store source for merging");
17109     }
17110     Ptr = BaseIndexOffset::match(Other, DAG);
17111     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17112   };
17113
17114   // Check if the pair of StoreNode and the RootNode already bail out many
17115   // times which is over the limit in dependence check.
17116   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
17117                                         SDNode *RootNode) -> bool {
17118     auto RootCount = StoreRootCountMap.find(StoreNode);
17119     return RootCount != StoreRootCountMap.end() &&
17120            RootCount->second.first == RootNode &&
17121            RootCount->second.second > StoreMergeDependenceLimit;
17122   };
17123
17124   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
17125     // This must be a chain use.
17126     if (UseIter.getOperandNo() != 0)
17127       return;
17128     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
17129       BaseIndexOffset Ptr;
17130       int64_t PtrDiff;
17131       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17132           !OverLimitInDependenceCheck(OtherStore, RootNode))
17133         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17134     }
17135   };
17136
17137   // We looking for a root node which is an ancestor to all mergable
17138   // stores. We search up through a load, to our root and then down
17139   // through all children. For instance we will find Store{1,2,3} if
17140   // St is Store1, Store2. or Store3 where the root is not a load
17141   // which always true for nonvolatile ops. TODO: Expand
17142   // the search to find all valid candidates through multiple layers of loads.
17143   //
17144   // Root
17145   // |-------|-------|
17146   // Load    Load    Store3
17147   // |       |
17148   // Store1   Store2
17149   //
17150   // FIXME: We should be able to climb and
17151   // descend TokenFactors to find candidates as well.
17152
17153   RootNode = St->getChain().getNode();
17154
17155   unsigned NumNodesExplored = 0;
17156   const unsigned MaxSearchNodes = 1024;
17157   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17158     RootNode = Ldn->getChain().getNode();
17159     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17160          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17161       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17162         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17163           TryToAddCandidate(I2);
17164       }
17165     }
17166   } else {
17167     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17168          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17169       TryToAddCandidate(I);
17170   }
17171 }
17172
17173 // We need to check that merging these stores does not cause a loop in
17174 // the DAG. Any store candidate may depend on another candidate
17175 // indirectly through its operand (we already consider dependencies
17176 // through the chain). Check in parallel by searching up from
17177 // non-chain operands of candidates.
17178 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17179     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17180     SDNode *RootNode) {
17181   // FIXME: We should be able to truncate a full search of
17182   // predecessors by doing a BFS and keeping tabs the originating
17183   // stores from which worklist nodes come from in a similar way to
17184   // TokenFactor simplfication.
17185
17186   SmallPtrSet<const SDNode *, 32> Visited;
17187   SmallVector<const SDNode *, 8> Worklist;
17188
17189   // RootNode is a predecessor to all candidates so we need not search
17190   // past it. Add RootNode (peeking through TokenFactors). Do not count
17191   // these towards size check.
17192
17193   Worklist.push_back(RootNode);
17194   while (!Worklist.empty()) {
17195     auto N = Worklist.pop_back_val();
17196     if (!Visited.insert(N).second)
17197       continue; // Already present in Visited.
17198     if (N->getOpcode() == ISD::TokenFactor) {
17199       for (SDValue Op : N->ops())
17200         Worklist.push_back(Op.getNode());
17201     }
17202   }
17203
17204   // Don't count pruning nodes towards max.
17205   unsigned int Max = 1024 + Visited.size();
17206   // Search Ops of store candidates.
17207   for (unsigned i = 0; i < NumStores; ++i) {
17208     SDNode *N = StoreNodes[i].MemNode;
17209     // Of the 4 Store Operands:
17210     //   * Chain (Op 0) -> We have already considered these
17211     //                    in candidate selection and can be
17212     //                    safely ignored
17213     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17214     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17215     //                       but aren't necessarily fromt the same base node, so
17216     //                       cycles possible (e.g. via indexed store).
17217     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17218     //               non-indexed stores). Not constant on all targets (e.g. ARM)
17219     //               and so can participate in a cycle.
17220     for (unsigned j = 1; j < N->getNumOperands(); ++j)
17221       Worklist.push_back(N->getOperand(j).getNode());
17222   }
17223   // Search through DAG. We can stop early if we find a store node.
17224   for (unsigned i = 0; i < NumStores; ++i)
17225     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17226                                      Max)) {
17227       // If the searching bail out, record the StoreNode and RootNode in the
17228       // StoreRootCountMap. If we have seen the pair many times over a limit,
17229       // we won't add the StoreNode into StoreNodes set again.
17230       if (Visited.size() >= Max) {
17231         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17232         if (RootCount.first == RootNode)
17233           RootCount.second++;
17234         else
17235           RootCount = {RootNode, 1};
17236       }
17237       return false;
17238     }
17239   return true;
17240 }
17241
17242 unsigned
17243 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17244                                   int64_t ElementSizeBytes) const {
17245   while (true) {
17246     // Find a store past the width of the first store.
17247     size_t StartIdx = 0;
17248     while ((StartIdx + 1 < StoreNodes.size()) &&
17249            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17250               StoreNodes[StartIdx + 1].OffsetFromBase)
17251       ++StartIdx;
17252
17253     // Bail if we don't have enough candidates to merge.
17254     if (StartIdx + 1 >= StoreNodes.size())
17255       return 0;
17256
17257     // Trim stores that overlapped with the first store.
17258     if (StartIdx)
17259       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17260
17261     // Scan the memory operations on the chain and find the first
17262     // non-consecutive store memory address.
17263     unsigned NumConsecutiveStores = 1;
17264     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17265     // Check that the addresses are consecutive starting from the second
17266     // element in the list of stores.
17267     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17268       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17269       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17270         break;
17271       NumConsecutiveStores = i + 1;
17272     }
17273     if (NumConsecutiveStores > 1)
17274       return NumConsecutiveStores;
17275
17276     // There are no consecutive stores at the start of the list.
17277     // Remove the first store and try again.
17278     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17279   }
17280 }
17281
17282 bool DAGCombiner::tryStoreMergeOfConstants(
17283     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17284     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17285   LLVMContext &Context = *DAG.getContext();
17286   const DataLayout &DL = DAG.getDataLayout();
17287   int64_t ElementSizeBytes = MemVT.getStoreSize();
17288   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17289   bool MadeChange = false;
17290
17291   // Store the constants into memory as one consecutive store.
17292   while (NumConsecutiveStores >= 2) {
17293     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17294     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17295     unsigned FirstStoreAlign = FirstInChain->getAlignment();
17296     unsigned LastLegalType = 1;
17297     unsigned LastLegalVectorType = 1;
17298     bool LastIntegerTrunc = false;
17299     bool NonZero = false;
17300     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17301     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17302       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17303       SDValue StoredVal = ST->getValue();
17304       bool IsElementZero = false;
17305       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17306         IsElementZero = C->isNullValue();
17307       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17308         IsElementZero = C->getConstantFPValue()->isNullValue();
17309       if (IsElementZero) {
17310         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17311           FirstZeroAfterNonZero = i;
17312       }
17313       NonZero |= !IsElementZero;
17314
17315       // Find a legal type for the constant store.
17316       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17317       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17318       bool IsFast = false;
17319
17320       // Break early when size is too large to be legal.
17321       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17322         break;
17323
17324       if (TLI.isTypeLegal(StoreTy) &&
17325           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17326           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17327                                  *FirstInChain->getMemOperand(), &IsFast) &&
17328           IsFast) {
17329         LastIntegerTrunc = false;
17330         LastLegalType = i + 1;
17331         // Or check whether a truncstore is legal.
17332       } else if (TLI.getTypeAction(Context, StoreTy) ==
17333                  TargetLowering::TypePromoteInteger) {
17334         EVT LegalizedStoredValTy =
17335             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17336         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17337             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
17338             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17339                                    *FirstInChain->getMemOperand(), &IsFast) &&
17340             IsFast) {
17341           LastIntegerTrunc = true;
17342           LastLegalType = i + 1;
17343         }
17344       }
17345
17346       // We only use vectors if the constant is known to be zero or the
17347       // target allows it and the function is not marked with the
17348       // noimplicitfloat attribute.
17349       if ((!NonZero ||
17350            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17351           AllowVectors) {
17352         // Find a legal type for the vector store.
17353         unsigned Elts = (i + 1) * NumMemElts;
17354         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17355         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17356             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
17357             TLI.allowsMemoryAccess(Context, DL, Ty,
17358                                    *FirstInChain->getMemOperand(), &IsFast) &&
17359             IsFast)
17360           LastLegalVectorType = i + 1;
17361       }
17362     }
17363
17364     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17365     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17366     bool UseTrunc = LastIntegerTrunc && !UseVector;
17367
17368     // Check if we found a legal integer type that creates a meaningful
17369     // merge.
17370     if (NumElem < 2) {
17371       // We know that candidate stores are in order and of correct
17372       // shape. While there is no mergeable sequence from the
17373       // beginning one may start later in the sequence. The only
17374       // reason a merge of size N could have failed where another of
17375       // the same size would not have, is if the alignment has
17376       // improved or we've dropped a non-zero value. Drop as many
17377       // candidates as we can here.
17378       unsigned NumSkip = 1;
17379       while ((NumSkip < NumConsecutiveStores) &&
17380              (NumSkip < FirstZeroAfterNonZero) &&
17381              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17382         NumSkip++;
17383
17384       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17385       NumConsecutiveStores -= NumSkip;
17386       continue;
17387     }
17388
17389     // Check that we can merge these candidates without causing a cycle.
17390     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17391                                                   RootNode)) {
17392       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17393       NumConsecutiveStores -= NumElem;
17394       continue;
17395     }
17396
17397     MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
17398                                                   /*IsConstantSrc*/ true,
17399                                                   UseVector, UseTrunc);
17400
17401     // Remove merged stores for next iteration.
17402     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17403     NumConsecutiveStores -= NumElem;
17404   }
17405   return MadeChange;
17406 }
17407
17408 bool DAGCombiner::tryStoreMergeOfExtracts(
17409     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17410     EVT MemVT, SDNode *RootNode) {
17411   LLVMContext &Context = *DAG.getContext();
17412   const DataLayout &DL = DAG.getDataLayout();
17413   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17414   bool MadeChange = false;
17415
17416   // Loop on Consecutive Stores on success.
17417   while (NumConsecutiveStores >= 2) {
17418     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17419     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17420     unsigned FirstStoreAlign = FirstInChain->getAlignment();
17421     unsigned NumStoresToMerge = 1;
17422     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17423       // Find a legal type for the vector store.
17424       unsigned Elts = (i + 1) * NumMemElts;
17425       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17426       bool IsFast = false;
17427
17428       // Break early when size is too large to be legal.
17429       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17430         break;
17431
17432       if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
17433           TLI.allowsMemoryAccess(Context, DL, Ty,
17434                                  *FirstInChain->getMemOperand(), &IsFast) &&
17435           IsFast)
17436         NumStoresToMerge = i + 1;
17437     }
17438
17439     // Check if we found a legal integer type creating a meaningful
17440     // merge.
17441     if (NumStoresToMerge < 2) {
17442       // We know that candidate stores are in order and of correct
17443       // shape. While there is no mergeable sequence from the
17444       // beginning one may start later in the sequence. The only
17445       // reason a merge of size N could have failed where another of
17446       // the same size would not have, is if the alignment has
17447       // improved. Drop as many candidates as we can here.
17448       unsigned NumSkip = 1;
17449       while ((NumSkip < NumConsecutiveStores) &&
17450              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17451         NumSkip++;
17452
17453       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17454       NumConsecutiveStores -= NumSkip;
17455       continue;
17456     }
17457
17458     // Check that we can merge these candidates without causing a cycle.
17459     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17460                                                   RootNode)) {
17461       StoreNodes.erase(StoreNodes.begin(),
17462                        StoreNodes.begin() + NumStoresToMerge);
17463       NumConsecutiveStores -= NumStoresToMerge;
17464       continue;
17465     }
17466
17467     MadeChange |= mergeStoresOfConstantsOrVecElts(
17468         StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
17469         /*UseVector*/ true, /*UseTrunc*/ false);
17470
17471     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17472     NumConsecutiveStores -= NumStoresToMerge;
17473   }
17474   return MadeChange;
17475 }
17476
17477 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17478                                        unsigned NumConsecutiveStores, EVT MemVT,
17479                                        SDNode *RootNode, bool AllowVectors,
17480                                        bool IsNonTemporalStore,
17481                                        bool IsNonTemporalLoad) {
17482   LLVMContext &Context = *DAG.getContext();
17483   const DataLayout &DL = DAG.getDataLayout();
17484   int64_t ElementSizeBytes = MemVT.getStoreSize();
17485   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17486   bool MadeChange = false;
17487
17488   // Look for load nodes which are used by the stored values.
17489   SmallVector<MemOpLink, 8> LoadNodes;
17490
17491   // Find acceptable loads. Loads need to have the same chain (token factor),
17492   // must not be zext, volatile, indexed, and they must be consecutive.
17493   BaseIndexOffset LdBasePtr;
17494
17495   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17496     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17497     SDValue Val = peekThroughBitcasts(St->getValue());
17498     LoadSDNode *Ld = cast<LoadSDNode>(Val);
17499
17500     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
17501     // If this is not the first ptr that we check.
17502     int64_t LdOffset = 0;
17503     if (LdBasePtr.getBase().getNode()) {
17504       // The base ptr must be the same.
17505       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
17506         break;
17507     } else {
17508       // Check that all other base pointers are the same as this one.
17509       LdBasePtr = LdPtr;
17510     }
17511
17512     // We found a potential memory operand to merge.
17513     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
17514   }
17515
17516   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
17517     Align RequiredAlignment;
17518     bool NeedRotate = false;
17519     if (LoadNodes.size() == 2) {
17520       // If we have load/store pair instructions and we only have two values,
17521       // don't bother merging.
17522       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
17523           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
17524         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
17525         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
17526         break;
17527       }
17528       // If the loads are reversed, see if we can rotate the halves into place.
17529       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
17530       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
17531       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
17532       if (Offset0 - Offset1 == ElementSizeBytes &&
17533           (hasOperation(ISD::ROTL, PairVT) ||
17534            hasOperation(ISD::ROTR, PairVT))) {
17535         std::swap(LoadNodes[0], LoadNodes[1]);
17536         NeedRotate = true;
17537       }
17538     }
17539     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17540     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17541     Align FirstStoreAlign = FirstInChain->getAlign();
17542     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
17543
17544     // Scan the memory operations on the chain and find the first
17545     // non-consecutive load memory address. These variables hold the index in
17546     // the store node array.
17547
17548     unsigned LastConsecutiveLoad = 1;
17549
17550     // This variable refers to the size and not index in the array.
17551     unsigned LastLegalVectorType = 1;
17552     unsigned LastLegalIntegerType = 1;
17553     bool isDereferenceable = true;
17554     bool DoIntegerTruncate = false;
17555     int64_t StartAddress = LoadNodes[0].OffsetFromBase;
17556     SDValue LoadChain = FirstLoad->getChain();
17557     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
17558       // All loads must share the same chain.
17559       if (LoadNodes[i].MemNode->getChain() != LoadChain)
17560         break;
17561
17562       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
17563       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17564         break;
17565       LastConsecutiveLoad = i;
17566
17567       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
17568         isDereferenceable = false;
17569
17570       // Find a legal type for the vector store.
17571       unsigned Elts = (i + 1) * NumMemElts;
17572       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17573
17574       // Break early when size is too large to be legal.
17575       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17576         break;
17577
17578       bool IsFastSt = false;
17579       bool IsFastLd = false;
17580       if (TLI.isTypeLegal(StoreTy) &&
17581           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17582           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17583                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
17584           IsFastSt &&
17585           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17586                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
17587           IsFastLd) {
17588         LastLegalVectorType = i + 1;
17589       }
17590
17591       // Find a legal type for the integer store.
17592       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17593       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17594       if (TLI.isTypeLegal(StoreTy) &&
17595           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17596           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17597                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
17598           IsFastSt &&
17599           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17600                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
17601           IsFastLd) {
17602         LastLegalIntegerType = i + 1;
17603         DoIntegerTruncate = false;
17604         // Or check whether a truncstore and extload is legal.
17605       } else if (TLI.getTypeAction(Context, StoreTy) ==
17606                  TargetLowering::TypePromoteInteger) {
17607         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
17608         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17609             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
17610             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17611             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17612             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
17613             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17614                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
17615             IsFastSt &&
17616             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17617                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
17618             IsFastLd) {
17619           LastLegalIntegerType = i + 1;
17620           DoIntegerTruncate = true;
17621         }
17622       }
17623     }
17624
17625     // Only use vector types if the vector type is larger than the integer
17626     // type. If they are the same, use integers.
17627     bool UseVectorTy =
17628         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
17629     unsigned LastLegalType =
17630         std::max(LastLegalVectorType, LastLegalIntegerType);
17631
17632     // We add +1 here because the LastXXX variables refer to location while
17633     // the NumElem refers to array/index size.
17634     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
17635     NumElem = std::min(LastLegalType, NumElem);
17636     Align FirstLoadAlign = FirstLoad->getAlign();
17637
17638     if (NumElem < 2) {
17639       // We know that candidate stores are in order and of correct
17640       // shape. While there is no mergeable sequence from the
17641       // beginning one may start later in the sequence. The only
17642       // reason a merge of size N could have failed where another of
17643       // the same size would not have is if the alignment or either
17644       // the load or store has improved. Drop as many candidates as we
17645       // can here.
17646       unsigned NumSkip = 1;
17647       while ((NumSkip < LoadNodes.size()) &&
17648              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
17649              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
17650         NumSkip++;
17651       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17652       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
17653       NumConsecutiveStores -= NumSkip;
17654       continue;
17655     }
17656
17657     // Check that we can merge these candidates without causing a cycle.
17658     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17659                                                   RootNode)) {
17660       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17661       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17662       NumConsecutiveStores -= NumElem;
17663       continue;
17664     }
17665
17666     // Find if it is better to use vectors or integers to load and store
17667     // to memory.
17668     EVT JointMemOpVT;
17669     if (UseVectorTy) {
17670       // Find a legal type for the vector store.
17671       unsigned Elts = NumElem * NumMemElts;
17672       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17673     } else {
17674       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
17675       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
17676     }
17677
17678     SDLoc LoadDL(LoadNodes[0].MemNode);
17679     SDLoc StoreDL(StoreNodes[0].MemNode);
17680
17681     // The merged loads are required to have the same incoming chain, so
17682     // using the first's chain is acceptable.
17683
17684     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
17685     AddToWorklist(NewStoreChain.getNode());
17686
17687     MachineMemOperand::Flags LdMMOFlags =
17688         isDereferenceable ? MachineMemOperand::MODereferenceable
17689                           : MachineMemOperand::MONone;
17690     if (IsNonTemporalLoad)
17691       LdMMOFlags |= MachineMemOperand::MONonTemporal;
17692
17693     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
17694                                               ? MachineMemOperand::MONonTemporal
17695                                               : MachineMemOperand::MONone;
17696
17697     SDValue NewLoad, NewStore;
17698     if (UseVectorTy || !DoIntegerTruncate) {
17699       NewLoad = DAG.getLoad(
17700           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
17701           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
17702       SDValue StoreOp = NewLoad;
17703       if (NeedRotate) {
17704         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
17705         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
17706                "Unexpected type for rotate-able load pair");
17707         SDValue RotAmt =
17708             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
17709         // Target can convert to the identical ROTR if it does not have ROTL.
17710         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
17711       }
17712       NewStore = DAG.getStore(
17713           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
17714           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
17715     } else { // This must be the truncstore/extload case
17716       EVT ExtendedTy =
17717           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
17718       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
17719                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
17720                                FirstLoad->getPointerInfo(), JointMemOpVT,
17721                                FirstLoadAlign, LdMMOFlags);
17722       NewStore = DAG.getTruncStore(
17723           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
17724           FirstInChain->getPointerInfo(), JointMemOpVT,
17725           FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17726     }
17727
17728     // Transfer chain users from old loads to the new load.
17729     for (unsigned i = 0; i < NumElem; ++i) {
17730       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
17731       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
17732                                     SDValue(NewLoad.getNode(), 1));
17733     }
17734
17735     // Replace all stores with the new store. Recursively remove corresponding
17736     // values if they are no longer used.
17737     for (unsigned i = 0; i < NumElem; ++i) {
17738       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
17739       CombineTo(StoreNodes[i].MemNode, NewStore);
17740       if (Val.getNode()->use_empty())
17741         recursivelyDeleteUnusedNodes(Val.getNode());
17742     }
17743
17744     MadeChange = true;
17745     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17746     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17747     NumConsecutiveStores -= NumElem;
17748   }
17749   return MadeChange;
17750 }
17751
17752 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
17753   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
17754     return false;
17755
17756   // TODO: Extend this function to merge stores of scalable vectors.
17757   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
17758   // store since we know <vscale x 16 x i8> is exactly twice as large as
17759   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
17760   EVT MemVT = St->getMemoryVT();
17761   if (MemVT.isScalableVector())
17762     return false;
17763   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
17764     return false;
17765
17766   // This function cannot currently deal with non-byte-sized memory sizes.
17767   int64_t ElementSizeBytes = MemVT.getStoreSize();
17768   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
17769     return false;
17770
17771   // Do not bother looking at stored values that are not constants, loads, or
17772   // extracted vector elements.
17773   SDValue StoredVal = peekThroughBitcasts(St->getValue());
17774   const StoreSource StoreSrc = getStoreSource(StoredVal);
17775   if (StoreSrc == StoreSource::Unknown)
17776     return false;
17777
17778   SmallVector<MemOpLink, 8> StoreNodes;
17779   SDNode *RootNode;
17780   // Find potential store merge candidates by searching through chain sub-DAG
17781   getStoreMergeCandidates(St, StoreNodes, RootNode);
17782
17783   // Check if there is anything to merge.
17784   if (StoreNodes.size() < 2)
17785     return false;
17786
17787   // Sort the memory operands according to their distance from the
17788   // base pointer.
17789   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
17790     return LHS.OffsetFromBase < RHS.OffsetFromBase;
17791   });
17792
17793   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
17794       Attribute::NoImplicitFloat);
17795   bool IsNonTemporalStore = St->isNonTemporal();
17796   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
17797                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
17798
17799   // Store Merge attempts to merge the lowest stores. This generally
17800   // works out as if successful, as the remaining stores are checked
17801   // after the first collection of stores is merged. However, in the
17802   // case that a non-mergeable store is found first, e.g., {p[-2],
17803   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
17804   // mergeable cases. To prevent this, we prune such stores from the
17805   // front of StoreNodes here.
17806   bool MadeChange = false;
17807   while (StoreNodes.size() > 1) {
17808     unsigned NumConsecutiveStores =
17809         getConsecutiveStores(StoreNodes, ElementSizeBytes);
17810     // There are no more stores in the list to examine.
17811     if (NumConsecutiveStores == 0)
17812       return MadeChange;
17813
17814     // We have at least 2 consecutive stores. Try to merge them.
17815     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
17816     switch (StoreSrc) {
17817     case StoreSource::Constant:
17818       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
17819                                              MemVT, RootNode, AllowVectors);
17820       break;
17821
17822     case StoreSource::Extract:
17823       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
17824                                             MemVT, RootNode);
17825       break;
17826
17827     case StoreSource::Load:
17828       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
17829                                          MemVT, RootNode, AllowVectors,
17830                                          IsNonTemporalStore, IsNonTemporalLoad);
17831       break;
17832
17833     default:
17834       llvm_unreachable("Unhandled store source type");
17835     }
17836   }
17837   return MadeChange;
17838 }
17839
17840 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
17841   SDLoc SL(ST);
17842   SDValue ReplStore;
17843
17844   // Replace the chain to avoid dependency.
17845   if (ST->isTruncatingStore()) {
17846     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
17847                                   ST->getBasePtr(), ST->getMemoryVT(),
17848                                   ST->getMemOperand());
17849   } else {
17850     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
17851                              ST->getMemOperand());
17852   }
17853
17854   // Create token to keep both nodes around.
17855   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
17856                               MVT::Other, ST->getChain(), ReplStore);
17857
17858   // Make sure the new and old chains are cleaned up.
17859   AddToWorklist(Token.getNode());
17860
17861   // Don't add users to work list.
17862   return CombineTo(ST, Token, false);
17863 }
17864
17865 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
17866   SDValue Value = ST->getValue();
17867   if (Value.getOpcode() == ISD::TargetConstantFP)
17868     return SDValue();
17869
17870   if (!ISD::isNormalStore(ST))
17871     return SDValue();
17872
17873   SDLoc DL(ST);
17874
17875   SDValue Chain = ST->getChain();
17876   SDValue Ptr = ST->getBasePtr();
17877
17878   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
17879
17880   // NOTE: If the original store is volatile, this transform must not increase
17881   // the number of stores.  For example, on x86-32 an f64 can be stored in one
17882   // processor operation but an i64 (which is not legal) requires two.  So the
17883   // transform should not be done in this case.
17884
17885   SDValue Tmp;
17886   switch (CFP->getSimpleValueType(0).SimpleTy) {
17887   default:
17888     llvm_unreachable("Unknown FP type");
17889   case MVT::f16:    // We don't do this for these yet.
17890   case MVT::f80:
17891   case MVT::f128:
17892   case MVT::ppcf128:
17893     return SDValue();
17894   case MVT::f32:
17895     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
17896         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17897       ;
17898       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
17899                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
17900                             MVT::i32);
17901       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
17902     }
17903
17904     return SDValue();
17905   case MVT::f64:
17906     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
17907          ST->isSimple()) ||
17908         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
17909       ;
17910       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
17911                             getZExtValue(), SDLoc(CFP), MVT::i64);
17912       return DAG.getStore(Chain, DL, Tmp,
17913                           Ptr, ST->getMemOperand());
17914     }
17915
17916     if (ST->isSimple() &&
17917         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17918       // Many FP stores are not made apparent until after legalize, e.g. for
17919       // argument passing.  Since this is so common, custom legalize the
17920       // 64-bit integer store into two 32-bit stores.
17921       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
17922       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
17923       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
17924       if (DAG.getDataLayout().isBigEndian())
17925         std::swap(Lo, Hi);
17926
17927       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17928       AAMDNodes AAInfo = ST->getAAInfo();
17929
17930       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17931                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
17932       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
17933       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
17934                                  ST->getPointerInfo().getWithOffset(4),
17935                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
17936       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
17937                          St0, St1);
17938     }
17939
17940     return SDValue();
17941   }
17942 }
17943
17944 SDValue DAGCombiner::visitSTORE(SDNode *N) {
17945   StoreSDNode *ST  = cast<StoreSDNode>(N);
17946   SDValue Chain = ST->getChain();
17947   SDValue Value = ST->getValue();
17948   SDValue Ptr   = ST->getBasePtr();
17949
17950   // If this is a store of a bit convert, store the input value if the
17951   // resultant store does not need a higher alignment than the original.
17952   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
17953       ST->isUnindexed()) {
17954     EVT SVT = Value.getOperand(0).getValueType();
17955     // If the store is volatile, we only want to change the store type if the
17956     // resulting store is legal. Otherwise we might increase the number of
17957     // memory accesses. We don't care if the original type was legal or not
17958     // as we assume software couldn't rely on the number of accesses of an
17959     // illegal type.
17960     // TODO: May be able to relax for unordered atomics (see D66309)
17961     if (((!LegalOperations && ST->isSimple()) ||
17962          TLI.isOperationLegal(ISD::STORE, SVT)) &&
17963         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
17964                                      DAG, *ST->getMemOperand())) {
17965       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
17966                           ST->getMemOperand());
17967     }
17968   }
17969
17970   // Turn 'store undef, Ptr' -> nothing.
17971   if (Value.isUndef() && ST->isUnindexed())
17972     return Chain;
17973
17974   // Try to infer better alignment information than the store already has.
17975   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
17976     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
17977       if (*Alignment > ST->getAlign() &&
17978           isAligned(*Alignment, ST->getSrcValueOffset())) {
17979         SDValue NewStore =
17980             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
17981                               ST->getMemoryVT(), *Alignment,
17982                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
17983         // NewStore will always be N as we are only refining the alignment
17984         assert(NewStore.getNode() == N);
17985         (void)NewStore;
17986       }
17987     }
17988   }
17989
17990   // Try transforming a pair floating point load / store ops to integer
17991   // load / store ops.
17992   if (SDValue NewST = TransformFPLoadStorePair(N))
17993     return NewST;
17994
17995   // Try transforming several stores into STORE (BSWAP).
17996   if (SDValue Store = mergeTruncStores(ST))
17997     return Store;
17998
17999   if (ST->isUnindexed()) {
18000     // Walk up chain skipping non-aliasing memory nodes, on this store and any
18001     // adjacent stores.
18002     if (findBetterNeighborChains(ST)) {
18003       // replaceStoreChain uses CombineTo, which handled all of the worklist
18004       // manipulation. Return the original node to not do anything else.
18005       return SDValue(ST, 0);
18006     }
18007     Chain = ST->getChain();
18008   }
18009
18010   // FIXME: is there such a thing as a truncating indexed store?
18011   if (ST->isTruncatingStore() && ST->isUnindexed() &&
18012       Value.getValueType().isInteger() &&
18013       (!isa<ConstantSDNode>(Value) ||
18014        !cast<ConstantSDNode>(Value)->isOpaque())) {
18015     APInt TruncDemandedBits =
18016         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18017                              ST->getMemoryVT().getScalarSizeInBits());
18018
18019     // See if we can simplify the input to this truncstore with knowledge that
18020     // only the low bits are being used.  For example:
18021     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
18022     AddToWorklist(Value.getNode());
18023     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18024       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18025                                ST->getMemOperand());
18026
18027     // Otherwise, see if we can simplify the operation with
18028     // SimplifyDemandedBits, which only works if the value has a single use.
18029     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18030       // Re-visit the store if anything changed and the store hasn't been merged
18031       // with another node (N is deleted) SimplifyDemandedBits will add Value's
18032       // node back to the worklist if necessary, but we also need to re-visit
18033       // the Store node itself.
18034       if (N->getOpcode() != ISD::DELETED_NODE)
18035         AddToWorklist(N);
18036       return SDValue(N, 0);
18037     }
18038   }
18039
18040   // If this is a load followed by a store to the same location, then the store
18041   // is dead/noop.
18042   // TODO: Can relax for unordered atomics (see D66309)
18043   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
18044     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18045         ST->isUnindexed() && ST->isSimple() &&
18046         Ld->getAddressSpace() == ST->getAddressSpace() &&
18047         // There can't be any side effects between the load and store, such as
18048         // a call or store.
18049         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18050       // The store is dead, remove it.
18051       return Chain;
18052     }
18053   }
18054
18055   // TODO: Can relax for unordered atomics (see D66309)
18056   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18057     if (ST->isUnindexed() && ST->isSimple() &&
18058         ST1->isUnindexed() && ST1->isSimple()) {
18059       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
18060           ST->getMemoryVT() == ST1->getMemoryVT() &&
18061           ST->getAddressSpace() == ST1->getAddressSpace()) {
18062         // If this is a store followed by a store with the same value to the
18063         // same location, then the store is dead/noop.
18064         return Chain;
18065       }
18066
18067       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18068           !ST1->getBasePtr().isUndef() &&
18069           // BaseIndexOffset and the code below requires knowing the size
18070           // of a vector, so bail out if MemoryVT is scalable.
18071           !ST->getMemoryVT().isScalableVector() &&
18072           !ST1->getMemoryVT().isScalableVector() &&
18073           ST->getAddressSpace() == ST1->getAddressSpace()) {
18074         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
18075         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
18076         unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18077         unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18078         // If this is a store who's preceding store to a subset of the current
18079         // location and no one other node is chained to that store we can
18080         // effectively drop the store. Do not remove stores to undef as they may
18081         // be used as data sinks.
18082         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18083           CombineTo(ST1, ST1->getChain());
18084           return SDValue();
18085         }
18086       }
18087     }
18088   }
18089
18090   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18091   // truncating store.  We can do this even if this is already a truncstore.
18092   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
18093       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18094       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
18095                             ST->getMemoryVT())) {
18096     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18097                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
18098   }
18099
18100   // Always perform this optimization before types are legal. If the target
18101   // prefers, also try this after legalization to catch stores that were created
18102   // by intrinsics or other nodes.
18103   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18104     while (true) {
18105       // There can be multiple store sequences on the same chain.
18106       // Keep trying to merge store sequences until we are unable to do so
18107       // or until we merge the last store on the chain.
18108       bool Changed = mergeConsecutiveStores(ST);
18109       if (!Changed) break;
18110       // Return N as merge only uses CombineTo and no worklist clean
18111       // up is necessary.
18112       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18113         return SDValue(N, 0);
18114     }
18115   }
18116
18117   // Try transforming N to an indexed store.
18118   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18119     return SDValue(N, 0);
18120
18121   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18122   //
18123   // Make sure to do this only after attempting to merge stores in order to
18124   //  avoid changing the types of some subset of stores due to visit order,
18125   //  preventing their merging.
18126   if (isa<ConstantFPSDNode>(ST->getValue())) {
18127     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
18128       return NewSt;
18129   }
18130
18131   if (SDValue NewSt = splitMergedValStore(ST))
18132     return NewSt;
18133
18134   return ReduceLoadOpStoreWidth(N);
18135 }
18136
18137 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18138   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18139   if (!LifetimeEnd->hasOffset())
18140     return SDValue();
18141
18142   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18143                                         LifetimeEnd->getOffset(), false);
18144
18145   // We walk up the chains to find stores.
18146   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18147   while (!Chains.empty()) {
18148     SDValue Chain = Chains.pop_back_val();
18149     if (!Chain.hasOneUse())
18150       continue;
18151     switch (Chain.getOpcode()) {
18152     case ISD::TokenFactor:
18153       for (unsigned Nops = Chain.getNumOperands(); Nops;)
18154         Chains.push_back(Chain.getOperand(--Nops));
18155       break;
18156     case ISD::LIFETIME_START:
18157     case ISD::LIFETIME_END:
18158       // We can forward past any lifetime start/end that can be proven not to
18159       // alias the node.
18160       if (!isAlias(Chain.getNode(), N))
18161         Chains.push_back(Chain.getOperand(0));
18162       break;
18163     case ISD::STORE: {
18164       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
18165       // TODO: Can relax for unordered atomics (see D66309)
18166       if (!ST->isSimple() || ST->isIndexed())
18167         continue;
18168       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18169       // The bounds of a scalable store are not known until runtime, so this
18170       // store cannot be elided.
18171       if (StoreSize.isScalable())
18172         continue;
18173       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
18174       // If we store purely within object bounds just before its lifetime ends,
18175       // we can remove the store.
18176       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18177                                    StoreSize.getFixedSize() * 8)) {
18178         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
18179                    dbgs() << "\nwithin LIFETIME_END of : ";
18180                    LifetimeEndBase.dump(); dbgs() << "\n");
18181         CombineTo(ST, ST->getChain());
18182         return SDValue(N, 0);
18183       }
18184     }
18185     }
18186   }
18187   return SDValue();
18188 }
18189
18190 /// For the instruction sequence of store below, F and I values
18191 /// are bundled together as an i64 value before being stored into memory.
18192 /// Sometimes it is more efficent to generate separate stores for F and I,
18193 /// which can remove the bitwise instructions or sink them to colder places.
18194 ///
18195 ///   (store (or (zext (bitcast F to i32) to i64),
18196 ///              (shl (zext I to i64), 32)), addr)  -->
18197 ///   (store F, addr) and (store I, addr+4)
18198 ///
18199 /// Similarly, splitting for other merged store can also be beneficial, like:
18200 /// For pair of {i32, i32}, i64 store --> two i32 stores.
18201 /// For pair of {i32, i16}, i64 store --> two i32 stores.
18202 /// For pair of {i16, i16}, i32 store --> two i16 stores.
18203 /// For pair of {i16, i8},  i32 store --> two i16 stores.
18204 /// For pair of {i8, i8},   i16 store --> two i8 stores.
18205 ///
18206 /// We allow each target to determine specifically which kind of splitting is
18207 /// supported.
18208 ///
18209 /// The store patterns are commonly seen from the simple code snippet below
18210 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18211 ///   void goo(const std::pair<int, float> &);
18212 ///   hoo() {
18213 ///     ...
18214 ///     goo(std::make_pair(tmp, ftmp));
18215 ///     ...
18216 ///   }
18217 ///
18218 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18219   if (OptLevel == CodeGenOpt::None)
18220     return SDValue();
18221
18222   // Can't change the number of memory accesses for a volatile store or break
18223   // atomicity for an atomic one.
18224   if (!ST->isSimple())
18225     return SDValue();
18226
18227   SDValue Val = ST->getValue();
18228   SDLoc DL(ST);
18229
18230   // Match OR operand.
18231   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18232     return SDValue();
18233
18234   // Match SHL operand and get Lower and Higher parts of Val.
18235   SDValue Op1 = Val.getOperand(0);
18236   SDValue Op2 = Val.getOperand(1);
18237   SDValue Lo, Hi;
18238   if (Op1.getOpcode() != ISD::SHL) {
18239     std::swap(Op1, Op2);
18240     if (Op1.getOpcode() != ISD::SHL)
18241       return SDValue();
18242   }
18243   Lo = Op2;
18244   Hi = Op1.getOperand(0);
18245   if (!Op1.hasOneUse())
18246     return SDValue();
18247
18248   // Match shift amount to HalfValBitSize.
18249   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18250   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18251   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18252     return SDValue();
18253
18254   // Lo and Hi are zero-extended from int with size less equal than 32
18255   // to i64.
18256   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18257       !Lo.getOperand(0).getValueType().isScalarInteger() ||
18258       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18259       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18260       !Hi.getOperand(0).getValueType().isScalarInteger() ||
18261       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18262     return SDValue();
18263
18264   // Use the EVT of low and high parts before bitcast as the input
18265   // of target query.
18266   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18267                   ? Lo.getOperand(0).getValueType()
18268                   : Lo.getValueType();
18269   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18270                    ? Hi.getOperand(0).getValueType()
18271                    : Hi.getValueType();
18272   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18273     return SDValue();
18274
18275   // Start to split store.
18276   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18277   AAMDNodes AAInfo = ST->getAAInfo();
18278
18279   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18280   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18281   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18282   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18283
18284   SDValue Chain = ST->getChain();
18285   SDValue Ptr = ST->getBasePtr();
18286   // Lower value store.
18287   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18288                              ST->getOriginalAlign(), MMOFlags, AAInfo);
18289   Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18290   // Higher value store.
18291   SDValue St1 = DAG.getStore(
18292       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18293       ST->getOriginalAlign(), MMOFlags, AAInfo);
18294   return St1;
18295 }
18296
18297 /// Convert a disguised subvector insertion into a shuffle:
18298 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18299   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
18300          "Expected extract_vector_elt");
18301   SDValue InsertVal = N->getOperand(1);
18302   SDValue Vec = N->getOperand(0);
18303
18304   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18305   // InsIndex)
18306   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
18307   //   CONCAT_VECTORS.
18308   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18309       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18310       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18311     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18312     ArrayRef<int> Mask = SVN->getMask();
18313
18314     SDValue X = Vec.getOperand(0);
18315     SDValue Y = Vec.getOperand(1);
18316
18317     // Vec's operand 0 is using indices from 0 to N-1 and
18318     // operand 1 from N to 2N - 1, where N is the number of
18319     // elements in the vectors.
18320     SDValue InsertVal0 = InsertVal.getOperand(0);
18321     int ElementOffset = -1;
18322
18323     // We explore the inputs of the shuffle in order to see if we find the
18324     // source of the extract_vector_elt. If so, we can use it to modify the
18325     // shuffle rather than perform an insert_vector_elt.
18326     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18327     ArgWorkList.emplace_back(Mask.size(), Y);
18328     ArgWorkList.emplace_back(0, X);
18329
18330     while (!ArgWorkList.empty()) {
18331       int ArgOffset;
18332       SDValue ArgVal;
18333       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18334
18335       if (ArgVal == InsertVal0) {
18336         ElementOffset = ArgOffset;
18337         break;
18338       }
18339
18340       // Peek through concat_vector.
18341       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18342         int CurrentArgOffset =
18343             ArgOffset + ArgVal.getValueType().getVectorNumElements();
18344         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18345         for (SDValue Op : reverse(ArgVal->ops())) {
18346           CurrentArgOffset -= Step;
18347           ArgWorkList.emplace_back(CurrentArgOffset, Op);
18348         }
18349
18350         // Make sure we went through all the elements and did not screw up index
18351         // computation.
18352         assert(CurrentArgOffset == ArgOffset);
18353       }
18354     }
18355
18356     if (ElementOffset != -1) {
18357       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18358
18359       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18360       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18361       assert(NewMask[InsIndex] <
18362                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
18363              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
18364
18365       SDValue LegalShuffle =
18366               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
18367                                           Y, NewMask, DAG);
18368       if (LegalShuffle)
18369         return LegalShuffle;
18370     }
18371   }
18372
18373   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18374   // bitcast(shuffle (bitcast V), (extended X), Mask)
18375   // Note: We do not use an insert_subvector node because that requires a
18376   // legal subvector type.
18377   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18378       !InsertVal.getOperand(0).getValueType().isVector())
18379     return SDValue();
18380
18381   SDValue SubVec = InsertVal.getOperand(0);
18382   SDValue DestVec = N->getOperand(0);
18383   EVT SubVecVT = SubVec.getValueType();
18384   EVT VT = DestVec.getValueType();
18385   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18386   // If the source only has a single vector element, the cost of creating adding
18387   // it to a vector is likely to exceed the cost of a insert_vector_elt.
18388   if (NumSrcElts == 1)
18389     return SDValue();
18390   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18391   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18392
18393   // Step 1: Create a shuffle mask that implements this insert operation. The
18394   // vector that we are inserting into will be operand 0 of the shuffle, so
18395   // those elements are just 'i'. The inserted subvector is in the first
18396   // positions of operand 1 of the shuffle. Example:
18397   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18398   SmallVector<int, 16> Mask(NumMaskVals);
18399   for (unsigned i = 0; i != NumMaskVals; ++i) {
18400     if (i / NumSrcElts == InsIndex)
18401       Mask[i] = (i % NumSrcElts) + NumMaskVals;
18402     else
18403       Mask[i] = i;
18404   }
18405
18406   // Bail out if the target can not handle the shuffle we want to create.
18407   EVT SubVecEltVT = SubVecVT.getVectorElementType();
18408   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18409   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18410     return SDValue();
18411
18412   // Step 2: Create a wide vector from the inserted source vector by appending
18413   // undefined elements. This is the same size as our destination vector.
18414   SDLoc DL(N);
18415   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18416   ConcatOps[0] = SubVec;
18417   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18418
18419   // Step 3: Shuffle in the padded subvector.
18420   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18421   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18422   AddToWorklist(PaddedSubV.getNode());
18423   AddToWorklist(DestVecBC.getNode());
18424   AddToWorklist(Shuf.getNode());
18425   return DAG.getBitcast(VT, Shuf);
18426 }
18427
18428 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18429   SDValue InVec = N->getOperand(0);
18430   SDValue InVal = N->getOperand(1);
18431   SDValue EltNo = N->getOperand(2);
18432   SDLoc DL(N);
18433
18434   EVT VT = InVec.getValueType();
18435   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18436
18437   // Insert into out-of-bounds element is undefined.
18438   if (IndexC && VT.isFixedLengthVector() &&
18439       IndexC->getZExtValue() >= VT.getVectorNumElements())
18440     return DAG.getUNDEF(VT);
18441
18442   // Remove redundant insertions:
18443   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18444   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18445       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18446     return InVec;
18447
18448   if (!IndexC) {
18449     // If this is variable insert to undef vector, it might be better to splat:
18450     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18451     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18452       if (VT.isScalableVector())
18453         return DAG.getSplatVector(VT, DL, InVal);
18454       else {
18455         SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
18456         return DAG.getBuildVector(VT, DL, Ops);
18457       }
18458     }
18459     return SDValue();
18460   }
18461
18462   if (VT.isScalableVector())
18463     return SDValue();
18464
18465   unsigned NumElts = VT.getVectorNumElements();
18466
18467   // We must know which element is being inserted for folds below here.
18468   unsigned Elt = IndexC->getZExtValue();
18469   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18470     return Shuf;
18471
18472   // Canonicalize insert_vector_elt dag nodes.
18473   // Example:
18474   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18475   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18476   //
18477   // Do this only if the child insert_vector node has one use; also
18478   // do this only if indices are both constants and Idx1 < Idx0.
18479   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
18480       && isa<ConstantSDNode>(InVec.getOperand(2))) {
18481     unsigned OtherElt = InVec.getConstantOperandVal(2);
18482     if (Elt < OtherElt) {
18483       // Swap nodes.
18484       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
18485                                   InVec.getOperand(0), InVal, EltNo);
18486       AddToWorklist(NewOp.getNode());
18487       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
18488                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
18489     }
18490   }
18491
18492   // If we can't generate a legal BUILD_VECTOR, exit
18493   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
18494     return SDValue();
18495
18496   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
18497   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
18498   // vector elements.
18499   SmallVector<SDValue, 8> Ops;
18500   // Do not combine these two vectors if the output vector will not replace
18501   // the input vector.
18502   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
18503     Ops.append(InVec.getNode()->op_begin(),
18504                InVec.getNode()->op_end());
18505   } else if (InVec.isUndef()) {
18506     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
18507   } else {
18508     return SDValue();
18509   }
18510   assert(Ops.size() == NumElts && "Unexpected vector size");
18511
18512   // Insert the element
18513   if (Elt < Ops.size()) {
18514     // All the operands of BUILD_VECTOR must have the same type;
18515     // we enforce that here.
18516     EVT OpVT = Ops[0].getValueType();
18517     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
18518   }
18519
18520   // Return the new vector
18521   return DAG.getBuildVector(VT, DL, Ops);
18522 }
18523
18524 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
18525                                                   SDValue EltNo,
18526                                                   LoadSDNode *OriginalLoad) {
18527   assert(OriginalLoad->isSimple());
18528
18529   EVT ResultVT = EVE->getValueType(0);
18530   EVT VecEltVT = InVecVT.getVectorElementType();
18531
18532   // If the vector element type is not a multiple of a byte then we are unable
18533   // to correctly compute an address to load only the extracted element as a
18534   // scalar.
18535   if (!VecEltVT.isByteSized())
18536     return SDValue();
18537
18538   Align Alignment = OriginalLoad->getAlign();
18539   Align NewAlign = DAG.getDataLayout().getABITypeAlign(
18540       VecEltVT.getTypeForEVT(*DAG.getContext()));
18541
18542   if (NewAlign > Alignment ||
18543       !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
18544     return SDValue();
18545
18546   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
18547     ISD::NON_EXTLOAD : ISD::EXTLOAD;
18548   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
18549     return SDValue();
18550
18551   Alignment = NewAlign;
18552
18553   MachinePointerInfo MPI;
18554   SDLoc DL(EVE);
18555   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
18556     int Elt = ConstEltNo->getZExtValue();
18557     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
18558     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
18559   } else {
18560     // Discard the pointer info except the address space because the memory
18561     // operand can't represent this new access since the offset is variable.
18562     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
18563   }
18564   SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
18565                                                InVecVT, EltNo);
18566
18567   // The replacement we need to do here is a little tricky: we need to
18568   // replace an extractelement of a load with a load.
18569   // Use ReplaceAllUsesOfValuesWith to do the replacement.
18570   // Note that this replacement assumes that the extractvalue is the only
18571   // use of the load; that's okay because we don't want to perform this
18572   // transformation in other cases anyway.
18573   SDValue Load;
18574   SDValue Chain;
18575   if (ResultVT.bitsGT(VecEltVT)) {
18576     // If the result type of vextract is wider than the load, then issue an
18577     // extending load instead.
18578     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
18579                                                   VecEltVT)
18580                                    ? ISD::ZEXTLOAD
18581                                    : ISD::EXTLOAD;
18582     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
18583                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
18584                           Alignment, OriginalLoad->getMemOperand()->getFlags(),
18585                           OriginalLoad->getAAInfo());
18586     Chain = Load.getValue(1);
18587   } else {
18588     Load = DAG.getLoad(
18589         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
18590         OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
18591     Chain = Load.getValue(1);
18592     if (ResultVT.bitsLT(VecEltVT))
18593       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
18594     else
18595       Load = DAG.getBitcast(ResultVT, Load);
18596   }
18597   WorklistRemover DeadNodes(*this);
18598   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
18599   SDValue To[] = { Load, Chain };
18600   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
18601   // Make sure to revisit this node to clean it up; it will usually be dead.
18602   AddToWorklist(EVE);
18603   // Since we're explicitly calling ReplaceAllUses, add the new node to the
18604   // worklist explicitly as well.
18605   AddToWorklistWithUsers(Load.getNode());
18606   ++OpsNarrowed;
18607   return SDValue(EVE, 0);
18608 }
18609
18610 /// Transform a vector binary operation into a scalar binary operation by moving
18611 /// the math/logic after an extract element of a vector.
18612 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
18613                                        bool LegalOperations) {
18614   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18615   SDValue Vec = ExtElt->getOperand(0);
18616   SDValue Index = ExtElt->getOperand(1);
18617   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18618   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
18619       Vec.getNode()->getNumValues() != 1)
18620     return SDValue();
18621
18622   // Targets may want to avoid this to prevent an expensive register transfer.
18623   if (!TLI.shouldScalarizeBinop(Vec))
18624     return SDValue();
18625
18626   // Extracting an element of a vector constant is constant-folded, so this
18627   // transform is just replacing a vector op with a scalar op while moving the
18628   // extract.
18629   SDValue Op0 = Vec.getOperand(0);
18630   SDValue Op1 = Vec.getOperand(1);
18631   if (isAnyConstantBuildVector(Op0, true) ||
18632       isAnyConstantBuildVector(Op1, true)) {
18633     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
18634     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
18635     SDLoc DL(ExtElt);
18636     EVT VT = ExtElt->getValueType(0);
18637     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
18638     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
18639     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
18640   }
18641
18642   return SDValue();
18643 }
18644
18645 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
18646   SDValue VecOp = N->getOperand(0);
18647   SDValue Index = N->getOperand(1);
18648   EVT ScalarVT = N->getValueType(0);
18649   EVT VecVT = VecOp.getValueType();
18650   if (VecOp.isUndef())
18651     return DAG.getUNDEF(ScalarVT);
18652
18653   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
18654   //
18655   // This only really matters if the index is non-constant since other combines
18656   // on the constant elements already work.
18657   SDLoc DL(N);
18658   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
18659       Index == VecOp.getOperand(2)) {
18660     SDValue Elt = VecOp.getOperand(1);
18661     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
18662   }
18663
18664   // (vextract (scalar_to_vector val, 0) -> val
18665   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18666     // Only 0'th element of SCALAR_TO_VECTOR is defined.
18667     if (DAG.isKnownNeverZero(Index))
18668       return DAG.getUNDEF(ScalarVT);
18669
18670     // Check if the result type doesn't match the inserted element type. A
18671     // SCALAR_TO_VECTOR may truncate the inserted element and the
18672     // EXTRACT_VECTOR_ELT may widen the extracted vector.
18673     SDValue InOp = VecOp.getOperand(0);
18674     if (InOp.getValueType() != ScalarVT) {
18675       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
18676       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18677     }
18678     return InOp;
18679   }
18680
18681   // extract_vector_elt of out-of-bounds element -> UNDEF
18682   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18683   if (IndexC && VecVT.isFixedLengthVector() &&
18684       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
18685     return DAG.getUNDEF(ScalarVT);
18686
18687   // extract_vector_elt (build_vector x, y), 1 -> y
18688   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
18689        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
18690       TLI.isTypeLegal(VecVT) &&
18691       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
18692     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
18693             VecVT.isFixedLengthVector()) &&
18694            "BUILD_VECTOR used for scalable vectors");
18695     unsigned IndexVal =
18696         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
18697     SDValue Elt = VecOp.getOperand(IndexVal);
18698     EVT InEltVT = Elt.getValueType();
18699
18700     // Sometimes build_vector's scalar input types do not match result type.
18701     if (ScalarVT == InEltVT)
18702       return Elt;
18703
18704     // TODO: It may be useful to truncate if free if the build_vector implicitly
18705     // converts.
18706   }
18707
18708   if (VecVT.isScalableVector())
18709     return SDValue();
18710
18711   // All the code from this point onwards assumes fixed width vectors, but it's
18712   // possible that some of the combinations could be made to work for scalable
18713   // vectors too.
18714   unsigned NumElts = VecVT.getVectorNumElements();
18715   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
18716
18717   // TODO: These transforms should not require the 'hasOneUse' restriction, but
18718   // there are regressions on multiple targets without it. We can end up with a
18719   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
18720   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
18721       VecOp.hasOneUse()) {
18722     // The vector index of the LSBs of the source depend on the endian-ness.
18723     bool IsLE = DAG.getDataLayout().isLittleEndian();
18724     unsigned ExtractIndex = IndexC->getZExtValue();
18725     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
18726     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
18727     SDValue BCSrc = VecOp.getOperand(0);
18728     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
18729       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
18730
18731     if (LegalTypes && BCSrc.getValueType().isInteger() &&
18732         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18733       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
18734       // trunc i64 X to i32
18735       SDValue X = BCSrc.getOperand(0);
18736       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
18737              "Extract element and scalar to vector can't change element type "
18738              "from FP to integer.");
18739       unsigned XBitWidth = X.getValueSizeInBits();
18740       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
18741
18742       // An extract element return value type can be wider than its vector
18743       // operand element type. In that case, the high bits are undefined, so
18744       // it's possible that we may need to extend rather than truncate.
18745       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
18746         assert(XBitWidth % VecEltBitWidth == 0 &&
18747                "Scalar bitwidth must be a multiple of vector element bitwidth");
18748         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
18749       }
18750     }
18751   }
18752
18753   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
18754     return BO;
18755
18756   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
18757   // We only perform this optimization before the op legalization phase because
18758   // we may introduce new vector instructions which are not backed by TD
18759   // patterns. For example on AVX, extracting elements from a wide vector
18760   // without using extract_subvector. However, if we can find an underlying
18761   // scalar value, then we can always use that.
18762   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
18763     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
18764     // Find the new index to extract from.
18765     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
18766
18767     // Extracting an undef index is undef.
18768     if (OrigElt == -1)
18769       return DAG.getUNDEF(ScalarVT);
18770
18771     // Select the right vector half to extract from.
18772     SDValue SVInVec;
18773     if (OrigElt < (int)NumElts) {
18774       SVInVec = VecOp.getOperand(0);
18775     } else {
18776       SVInVec = VecOp.getOperand(1);
18777       OrigElt -= NumElts;
18778     }
18779
18780     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
18781       SDValue InOp = SVInVec.getOperand(OrigElt);
18782       if (InOp.getValueType() != ScalarVT) {
18783         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
18784         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18785       }
18786
18787       return InOp;
18788     }
18789
18790     // FIXME: We should handle recursing on other vector shuffles and
18791     // scalar_to_vector here as well.
18792
18793     if (!LegalOperations ||
18794         // FIXME: Should really be just isOperationLegalOrCustom.
18795         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
18796         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
18797       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
18798                          DAG.getVectorIdxConstant(OrigElt, DL));
18799     }
18800   }
18801
18802   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
18803   // simplify it based on the (valid) extraction indices.
18804   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
18805         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18806                Use->getOperand(0) == VecOp &&
18807                isa<ConstantSDNode>(Use->getOperand(1));
18808       })) {
18809     APInt DemandedElts = APInt::getNullValue(NumElts);
18810     for (SDNode *Use : VecOp->uses()) {
18811       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
18812       if (CstElt->getAPIntValue().ult(NumElts))
18813         DemandedElts.setBit(CstElt->getZExtValue());
18814     }
18815     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
18816       // We simplified the vector operand of this extract element. If this
18817       // extract is not dead, visit it again so it is folded properly.
18818       if (N->getOpcode() != ISD::DELETED_NODE)
18819         AddToWorklist(N);
18820       return SDValue(N, 0);
18821     }
18822     APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
18823     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
18824       // We simplified the vector operand of this extract element. If this
18825       // extract is not dead, visit it again so it is folded properly.
18826       if (N->getOpcode() != ISD::DELETED_NODE)
18827         AddToWorklist(N);
18828       return SDValue(N, 0);
18829     }
18830   }
18831
18832   // Everything under here is trying to match an extract of a loaded value.
18833   // If the result of load has to be truncated, then it's not necessarily
18834   // profitable.
18835   bool BCNumEltsChanged = false;
18836   EVT ExtVT = VecVT.getVectorElementType();
18837   EVT LVT = ExtVT;
18838   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
18839     return SDValue();
18840
18841   if (VecOp.getOpcode() == ISD::BITCAST) {
18842     // Don't duplicate a load with other uses.
18843     if (!VecOp.hasOneUse())
18844       return SDValue();
18845
18846     EVT BCVT = VecOp.getOperand(0).getValueType();
18847     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
18848       return SDValue();
18849     if (NumElts != BCVT.getVectorNumElements())
18850       BCNumEltsChanged = true;
18851     VecOp = VecOp.getOperand(0);
18852     ExtVT = BCVT.getVectorElementType();
18853   }
18854
18855   // extract (vector load $addr), i --> load $addr + i * size
18856   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
18857       ISD::isNormalLoad(VecOp.getNode()) &&
18858       !Index->hasPredecessor(VecOp.getNode())) {
18859     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
18860     if (VecLoad && VecLoad->isSimple())
18861       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
18862   }
18863
18864   // Perform only after legalization to ensure build_vector / vector_shuffle
18865   // optimizations have already been done.
18866   if (!LegalOperations || !IndexC)
18867     return SDValue();
18868
18869   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
18870   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
18871   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
18872   int Elt = IndexC->getZExtValue();
18873   LoadSDNode *LN0 = nullptr;
18874   if (ISD::isNormalLoad(VecOp.getNode())) {
18875     LN0 = cast<LoadSDNode>(VecOp);
18876   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18877              VecOp.getOperand(0).getValueType() == ExtVT &&
18878              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
18879     // Don't duplicate a load with other uses.
18880     if (!VecOp.hasOneUse())
18881       return SDValue();
18882
18883     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
18884   }
18885   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
18886     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
18887     // =>
18888     // (load $addr+1*size)
18889
18890     // Don't duplicate a load with other uses.
18891     if (!VecOp.hasOneUse())
18892       return SDValue();
18893
18894     // If the bit convert changed the number of elements, it is unsafe
18895     // to examine the mask.
18896     if (BCNumEltsChanged)
18897       return SDValue();
18898
18899     // Select the input vector, guarding against out of range extract vector.
18900     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
18901     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
18902
18903     if (VecOp.getOpcode() == ISD::BITCAST) {
18904       // Don't duplicate a load with other uses.
18905       if (!VecOp.hasOneUse())
18906         return SDValue();
18907
18908       VecOp = VecOp.getOperand(0);
18909     }
18910     if (ISD::isNormalLoad(VecOp.getNode())) {
18911       LN0 = cast<LoadSDNode>(VecOp);
18912       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
18913       Index = DAG.getConstant(Elt, DL, Index.getValueType());
18914     }
18915   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
18916              VecVT.getVectorElementType() == ScalarVT &&
18917              (!LegalTypes ||
18918               TLI.isTypeLegal(
18919                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
18920     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
18921     //      -> extract_vector_elt a, 0
18922     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
18923     //      -> extract_vector_elt a, 1
18924     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
18925     //      -> extract_vector_elt b, 0
18926     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
18927     //      -> extract_vector_elt b, 1
18928     SDLoc SL(N);
18929     EVT ConcatVT = VecOp.getOperand(0).getValueType();
18930     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
18931     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
18932                                      Index.getValueType());
18933
18934     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
18935     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
18936                               ConcatVT.getVectorElementType(),
18937                               ConcatOp, NewIdx);
18938     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
18939   }
18940
18941   // Make sure we found a non-volatile load and the extractelement is
18942   // the only use.
18943   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
18944     return SDValue();
18945
18946   // If Idx was -1 above, Elt is going to be -1, so just return undef.
18947   if (Elt == -1)
18948     return DAG.getUNDEF(LVT);
18949
18950   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
18951 }
18952
18953 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
18954 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
18955   // We perform this optimization post type-legalization because
18956   // the type-legalizer often scalarizes integer-promoted vectors.
18957   // Performing this optimization before may create bit-casts which
18958   // will be type-legalized to complex code sequences.
18959   // We perform this optimization only before the operation legalizer because we
18960   // may introduce illegal operations.
18961   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
18962     return SDValue();
18963
18964   unsigned NumInScalars = N->getNumOperands();
18965   SDLoc DL(N);
18966   EVT VT = N->getValueType(0);
18967
18968   // Check to see if this is a BUILD_VECTOR of a bunch of values
18969   // which come from any_extend or zero_extend nodes. If so, we can create
18970   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
18971   // optimizations. We do not handle sign-extend because we can't fill the sign
18972   // using shuffles.
18973   EVT SourceType = MVT::Other;
18974   bool AllAnyExt = true;
18975
18976   for (unsigned i = 0; i != NumInScalars; ++i) {
18977     SDValue In = N->getOperand(i);
18978     // Ignore undef inputs.
18979     if (In.isUndef()) continue;
18980
18981     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
18982     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
18983
18984     // Abort if the element is not an extension.
18985     if (!ZeroExt && !AnyExt) {
18986       SourceType = MVT::Other;
18987       break;
18988     }
18989
18990     // The input is a ZeroExt or AnyExt. Check the original type.
18991     EVT InTy = In.getOperand(0).getValueType();
18992
18993     // Check that all of the widened source types are the same.
18994     if (SourceType == MVT::Other)
18995       // First time.
18996       SourceType = InTy;
18997     else if (InTy != SourceType) {
18998       // Multiple income types. Abort.
18999       SourceType = MVT::Other;
19000       break;
19001     }
19002
19003     // Check if all of the extends are ANY_EXTENDs.
19004     AllAnyExt &= AnyExt;
19005   }
19006
19007   // In order to have valid types, all of the inputs must be extended from the
19008   // same source type and all of the inputs must be any or zero extend.
19009   // Scalar sizes must be a power of two.
19010   EVT OutScalarTy = VT.getScalarType();
19011   bool ValidTypes = SourceType != MVT::Other &&
19012                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19013                  isPowerOf2_32(SourceType.getSizeInBits());
19014
19015   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19016   // turn into a single shuffle instruction.
19017   if (!ValidTypes)
19018     return SDValue();
19019
19020   // If we already have a splat buildvector, then don't fold it if it means
19021   // introducing zeros.
19022   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19023     return SDValue();
19024
19025   bool isLE = DAG.getDataLayout().isLittleEndian();
19026   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19027   assert(ElemRatio > 1 && "Invalid element size ratio");
19028   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19029                                DAG.getConstant(0, DL, SourceType);
19030
19031   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19032   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
19033
19034   // Populate the new build_vector
19035   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19036     SDValue Cast = N->getOperand(i);
19037     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
19038             Cast.getOpcode() == ISD::ZERO_EXTEND ||
19039             Cast.isUndef()) && "Invalid cast opcode");
19040     SDValue In;
19041     if (Cast.isUndef())
19042       In = DAG.getUNDEF(SourceType);
19043     else
19044       In = Cast->getOperand(0);
19045     unsigned Index = isLE ? (i * ElemRatio) :
19046                             (i * ElemRatio + (ElemRatio - 1));
19047
19048     assert(Index < Ops.size() && "Invalid index");
19049     Ops[Index] = In;
19050   }
19051
19052   // The type of the new BUILD_VECTOR node.
19053   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
19054   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
19055          "Invalid vector size");
19056   // Check if the new vector type is legal.
19057   if (!isTypeLegal(VecVT) ||
19058       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19059        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
19060     return SDValue();
19061
19062   // Make the new BUILD_VECTOR.
19063   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19064
19065   // The new BUILD_VECTOR node has the potential to be further optimized.
19066   AddToWorklist(BV.getNode());
19067   // Bitcast to the desired type.
19068   return DAG.getBitcast(VT, BV);
19069 }
19070
19071 // Simplify (build_vec (trunc $1)
19072 //                     (trunc (srl $1 half-width))
19073 //                     (trunc (srl $1 (2 * half-width))) …)
19074 // to (bitcast $1)
19075 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19076   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19077
19078   // Only for little endian
19079   if (!DAG.getDataLayout().isLittleEndian())
19080     return SDValue();
19081
19082   SDLoc DL(N);
19083   EVT VT = N->getValueType(0);
19084   EVT OutScalarTy = VT.getScalarType();
19085   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19086
19087   // Only for power of two types to be sure that bitcast works well
19088   if (!isPowerOf2_64(ScalarTypeBitsize))
19089     return SDValue();
19090
19091   unsigned NumInScalars = N->getNumOperands();
19092
19093   // Look through bitcasts
19094   auto PeekThroughBitcast = [](SDValue Op) {
19095     if (Op.getOpcode() == ISD::BITCAST)
19096       return Op.getOperand(0);
19097     return Op;
19098   };
19099
19100   // The source value where all the parts are extracted.
19101   SDValue Src;
19102   for (unsigned i = 0; i != NumInScalars; ++i) {
19103     SDValue In = PeekThroughBitcast(N->getOperand(i));
19104     // Ignore undef inputs.
19105     if (In.isUndef()) continue;
19106
19107     if (In.getOpcode() != ISD::TRUNCATE)
19108       return SDValue();
19109
19110     In = PeekThroughBitcast(In.getOperand(0));
19111
19112     if (In.getOpcode() != ISD::SRL) {
19113       // For now only build_vec without shuffling, handle shifts here in the
19114       // future.
19115       if (i != 0)
19116         return SDValue();
19117
19118       Src = In;
19119     } else {
19120       // In is SRL
19121       SDValue part = PeekThroughBitcast(In.getOperand(0));
19122
19123       if (!Src) {
19124         Src = part;
19125       } else if (Src != part) {
19126         // Vector parts do not stem from the same variable
19127         return SDValue();
19128       }
19129
19130       SDValue ShiftAmtVal = In.getOperand(1);
19131       if (!isa<ConstantSDNode>(ShiftAmtVal))
19132         return SDValue();
19133
19134       uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
19135
19136       // The extracted value is not extracted at the right position
19137       if (ShiftAmt != i * ScalarTypeBitsize)
19138         return SDValue();
19139     }
19140   }
19141
19142   // Only cast if the size is the same
19143   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19144     return SDValue();
19145
19146   return DAG.getBitcast(VT, Src);
19147 }
19148
19149 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19150                                            ArrayRef<int> VectorMask,
19151                                            SDValue VecIn1, SDValue VecIn2,
19152                                            unsigned LeftIdx, bool DidSplitVec) {
19153   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19154
19155   EVT VT = N->getValueType(0);
19156   EVT InVT1 = VecIn1.getValueType();
19157   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19158
19159   unsigned NumElems = VT.getVectorNumElements();
19160   unsigned ShuffleNumElems = NumElems;
19161
19162   // If we artificially split a vector in two already, then the offsets in the
19163   // operands will all be based off of VecIn1, even those in VecIn2.
19164   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19165
19166   uint64_t VTSize = VT.getFixedSizeInBits();
19167   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19168   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19169
19170   assert(InVT2Size <= InVT1Size &&
19171          "Inputs must be sorted to be in non-increasing vector size order.");
19172
19173   // We can't generate a shuffle node with mismatched input and output types.
19174   // Try to make the types match the type of the output.
19175   if (InVT1 != VT || InVT2 != VT) {
19176     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19177       // If the output vector length is a multiple of both input lengths,
19178       // we can concatenate them and pad the rest with undefs.
19179       unsigned NumConcats = VTSize / InVT1Size;
19180       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
19181       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
19182       ConcatOps[0] = VecIn1;
19183       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19184       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19185       VecIn2 = SDValue();
19186     } else if (InVT1Size == VTSize * 2) {
19187       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19188         return SDValue();
19189
19190       if (!VecIn2.getNode()) {
19191         // If we only have one input vector, and it's twice the size of the
19192         // output, split it in two.
19193         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
19194                              DAG.getVectorIdxConstant(NumElems, DL));
19195         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
19196         // Since we now have shorter input vectors, adjust the offset of the
19197         // second vector's start.
19198         Vec2Offset = NumElems;
19199       } else {
19200         assert(InVT2Size <= InVT1Size &&
19201                "Second input is not going to be larger than the first one.");
19202
19203         // VecIn1 is wider than the output, and we have another, possibly
19204         // smaller input. Pad the smaller input with undefs, shuffle at the
19205         // input vector width, and extract the output.
19206         // The shuffle type is different than VT, so check legality again.
19207         if (LegalOperations &&
19208             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
19209           return SDValue();
19210
19211         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19212         // lower it back into a BUILD_VECTOR. So if the inserted type is
19213         // illegal, don't even try.
19214         if (InVT1 != InVT2) {
19215           if (!TLI.isTypeLegal(InVT2))
19216             return SDValue();
19217           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19218                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19219         }
19220         ShuffleNumElems = NumElems * 2;
19221       }
19222     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19223       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19224       ConcatOps[0] = VecIn2;
19225       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19226     } else {
19227       // TODO: Support cases where the length mismatch isn't exactly by a
19228       // factor of 2.
19229       // TODO: Move this check upwards, so that if we have bad type
19230       // mismatches, we don't create any DAG nodes.
19231       return SDValue();
19232     }
19233   }
19234
19235   // Initialize mask to undef.
19236   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19237
19238   // Only need to run up to the number of elements actually used, not the
19239   // total number of elements in the shuffle - if we are shuffling a wider
19240   // vector, the high lanes should be set to undef.
19241   for (unsigned i = 0; i != NumElems; ++i) {
19242     if (VectorMask[i] <= 0)
19243       continue;
19244
19245     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19246     if (VectorMask[i] == (int)LeftIdx) {
19247       Mask[i] = ExtIndex;
19248     } else if (VectorMask[i] == (int)LeftIdx + 1) {
19249       Mask[i] = Vec2Offset + ExtIndex;
19250     }
19251   }
19252
19253   // The type the input vectors may have changed above.
19254   InVT1 = VecIn1.getValueType();
19255
19256   // If we already have a VecIn2, it should have the same type as VecIn1.
19257   // If we don't, get an undef/zero vector of the appropriate type.
19258   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19259   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
19260
19261   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19262   if (ShuffleNumElems > NumElems)
19263     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19264
19265   return Shuffle;
19266 }
19267
19268 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
19269   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19270
19271   // First, determine where the build vector is not undef.
19272   // TODO: We could extend this to handle zero elements as well as undefs.
19273   int NumBVOps = BV->getNumOperands();
19274   int ZextElt = -1;
19275   for (int i = 0; i != NumBVOps; ++i) {
19276     SDValue Op = BV->getOperand(i);
19277     if (Op.isUndef())
19278       continue;
19279     if (ZextElt == -1)
19280       ZextElt = i;
19281     else
19282       return SDValue();
19283   }
19284   // Bail out if there's no non-undef element.
19285   if (ZextElt == -1)
19286     return SDValue();
19287
19288   // The build vector contains some number of undef elements and exactly
19289   // one other element. That other element must be a zero-extended scalar
19290   // extracted from a vector at a constant index to turn this into a shuffle.
19291   // Also, require that the build vector does not implicitly truncate/extend
19292   // its elements.
19293   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19294   EVT VT = BV->getValueType(0);
19295   SDValue Zext = BV->getOperand(ZextElt);
19296   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19297       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19298       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19299       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19300     return SDValue();
19301
19302   // The zero-extend must be a multiple of the source size, and we must be
19303   // building a vector of the same size as the source of the extract element.
19304   SDValue Extract = Zext.getOperand(0);
19305   unsigned DestSize = Zext.getValueSizeInBits();
19306   unsigned SrcSize = Extract.getValueSizeInBits();
19307   if (DestSize % SrcSize != 0 ||
19308       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19309     return SDValue();
19310
19311   // Create a shuffle mask that will combine the extracted element with zeros
19312   // and undefs.
19313   int ZextRatio = DestSize / SrcSize;
19314   int NumMaskElts = NumBVOps * ZextRatio;
19315   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19316   for (int i = 0; i != NumMaskElts; ++i) {
19317     if (i / ZextRatio == ZextElt) {
19318       // The low bits of the (potentially translated) extracted element map to
19319       // the source vector. The high bits map to zero. We will use a zero vector
19320       // as the 2nd source operand of the shuffle, so use the 1st element of
19321       // that vector (mask value is number-of-elements) for the high bits.
19322       if (i % ZextRatio == 0)
19323         ShufMask[i] = Extract.getConstantOperandVal(1);
19324       else
19325         ShufMask[i] = NumMaskElts;
19326     }
19327
19328     // Undef elements of the build vector remain undef because we initialize
19329     // the shuffle mask with -1.
19330   }
19331
19332   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19333   // bitcast (shuffle V, ZeroVec, VectorMask)
19334   SDLoc DL(BV);
19335   EVT VecVT = Extract.getOperand(0).getValueType();
19336   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19337   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19338   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19339                                              ZeroVec, ShufMask, DAG);
19340   if (!Shuf)
19341     return SDValue();
19342   return DAG.getBitcast(VT, Shuf);
19343 }
19344
19345 // FIXME: promote to STLExtras.
19346 template <typename R, typename T>
19347 static auto getFirstIndexOf(R &&Range, const T &Val) {
19348   auto I = find(Range, Val);
19349   if (I == Range.end())
19350     return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
19351   return std::distance(Range.begin(), I);
19352 }
19353
19354 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19355 // operations. If the types of the vectors we're extracting from allow it,
19356 // turn this into a vector_shuffle node.
19357 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19358   SDLoc DL(N);
19359   EVT VT = N->getValueType(0);
19360
19361   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19362   if (!isTypeLegal(VT))
19363     return SDValue();
19364
19365   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
19366     return V;
19367
19368   // May only combine to shuffle after legalize if shuffle is legal.
19369   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19370     return SDValue();
19371
19372   bool UsesZeroVector = false;
19373   unsigned NumElems = N->getNumOperands();
19374
19375   // Record, for each element of the newly built vector, which input vector
19376   // that element comes from. -1 stands for undef, 0 for the zero vector,
19377   // and positive values for the input vectors.
19378   // VectorMask maps each element to its vector number, and VecIn maps vector
19379   // numbers to their initial SDValues.
19380
19381   SmallVector<int, 8> VectorMask(NumElems, -1);
19382   SmallVector<SDValue, 8> VecIn;
19383   VecIn.push_back(SDValue());
19384
19385   for (unsigned i = 0; i != NumElems; ++i) {
19386     SDValue Op = N->getOperand(i);
19387
19388     if (Op.isUndef())
19389       continue;
19390
19391     // See if we can use a blend with a zero vector.
19392     // TODO: Should we generalize this to a blend with an arbitrary constant
19393     // vector?
19394     if (isNullConstant(Op) || isNullFPConstant(Op)) {
19395       UsesZeroVector = true;
19396       VectorMask[i] = 0;
19397       continue;
19398     }
19399
19400     // Not an undef or zero. If the input is something other than an
19401     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19402     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19403         !isa<ConstantSDNode>(Op.getOperand(1)))
19404       return SDValue();
19405     SDValue ExtractedFromVec = Op.getOperand(0);
19406
19407     if (ExtractedFromVec.getValueType().isScalableVector())
19408       return SDValue();
19409
19410     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19411     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19412       return SDValue();
19413
19414     // All inputs must have the same element type as the output.
19415     if (VT.getVectorElementType() !=
19416         ExtractedFromVec.getValueType().getVectorElementType())
19417       return SDValue();
19418
19419     // Have we seen this input vector before?
19420     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19421     // a map back from SDValues to numbers isn't worth it.
19422     int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
19423     if (Idx == -1) { // A new source vector?
19424       Idx = VecIn.size();
19425       VecIn.push_back(ExtractedFromVec);
19426     }
19427
19428     VectorMask[i] = Idx;
19429   }
19430
19431   // If we didn't find at least one input vector, bail out.
19432   if (VecIn.size() < 2)
19433     return SDValue();
19434
19435   // If all the Operands of BUILD_VECTOR extract from same
19436   // vector, then split the vector efficiently based on the maximum
19437   // vector access index and adjust the VectorMask and
19438   // VecIn accordingly.
19439   bool DidSplitVec = false;
19440   if (VecIn.size() == 2) {
19441     unsigned MaxIndex = 0;
19442     unsigned NearestPow2 = 0;
19443     SDValue Vec = VecIn.back();
19444     EVT InVT = Vec.getValueType();
19445     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19446
19447     for (unsigned i = 0; i < NumElems; i++) {
19448       if (VectorMask[i] <= 0)
19449         continue;
19450       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19451       IndexVec[i] = Index;
19452       MaxIndex = std::max(MaxIndex, Index);
19453     }
19454
19455     NearestPow2 = PowerOf2Ceil(MaxIndex);
19456     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19457         NumElems * 2 < NearestPow2) {
19458       unsigned SplitSize = NearestPow2 / 2;
19459       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19460                                      InVT.getVectorElementType(), SplitSize);
19461       if (TLI.isTypeLegal(SplitVT) &&
19462           SplitSize + SplitVT.getVectorNumElements() <=
19463               InVT.getVectorNumElements()) {
19464         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19465                                      DAG.getVectorIdxConstant(SplitSize, DL));
19466         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19467                                      DAG.getVectorIdxConstant(0, DL));
19468         VecIn.pop_back();
19469         VecIn.push_back(VecIn1);
19470         VecIn.push_back(VecIn2);
19471         DidSplitVec = true;
19472
19473         for (unsigned i = 0; i < NumElems; i++) {
19474           if (VectorMask[i] <= 0)
19475             continue;
19476           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
19477         }
19478       }
19479     }
19480   }
19481
19482   // Sort input vectors by decreasing vector element count,
19483   // while preserving the relative order of equally-sized vectors.
19484   // Note that we keep the first "implicit zero vector as-is.
19485   SmallVector<SDValue, 8> SortedVecIn(VecIn);
19486   llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
19487                     [](const SDValue &a, const SDValue &b) {
19488                       return a.getValueType().getVectorNumElements() >
19489                              b.getValueType().getVectorNumElements();
19490                     });
19491
19492   // We now also need to rebuild the VectorMask, because it referenced element
19493   // order in VecIn, and we just sorted them.
19494   for (int &SourceVectorIndex : VectorMask) {
19495     if (SourceVectorIndex <= 0)
19496       continue;
19497     unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
19498     assert(Idx > 0 && Idx < SortedVecIn.size() &&
19499            VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
19500     SourceVectorIndex = Idx;
19501   }
19502
19503   VecIn = std::move(SortedVecIn);
19504
19505   // TODO: Should this fire if some of the input vectors has illegal type (like
19506   // it does now), or should we let legalization run its course first?
19507
19508   // Shuffle phase:
19509   // Take pairs of vectors, and shuffle them so that the result has elements
19510   // from these vectors in the correct places.
19511   // For example, given:
19512   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
19513   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
19514   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
19515   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
19516   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
19517   // We will generate:
19518   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
19519   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
19520   SmallVector<SDValue, 4> Shuffles;
19521   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
19522     unsigned LeftIdx = 2 * In + 1;
19523     SDValue VecLeft = VecIn[LeftIdx];
19524     SDValue VecRight =
19525         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
19526
19527     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
19528                                                 VecRight, LeftIdx, DidSplitVec))
19529       Shuffles.push_back(Shuffle);
19530     else
19531       return SDValue();
19532   }
19533
19534   // If we need the zero vector as an "ingredient" in the blend tree, add it
19535   // to the list of shuffles.
19536   if (UsesZeroVector)
19537     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
19538                                       : DAG.getConstantFP(0.0, DL, VT));
19539
19540   // If we only have one shuffle, we're done.
19541   if (Shuffles.size() == 1)
19542     return Shuffles[0];
19543
19544   // Update the vector mask to point to the post-shuffle vectors.
19545   for (int &Vec : VectorMask)
19546     if (Vec == 0)
19547       Vec = Shuffles.size() - 1;
19548     else
19549       Vec = (Vec - 1) / 2;
19550
19551   // More than one shuffle. Generate a binary tree of blends, e.g. if from
19552   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
19553   // generate:
19554   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
19555   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
19556   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
19557   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
19558   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
19559   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
19560   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
19561
19562   // Make sure the initial size of the shuffle list is even.
19563   if (Shuffles.size() % 2)
19564     Shuffles.push_back(DAG.getUNDEF(VT));
19565
19566   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
19567     if (CurSize % 2) {
19568       Shuffles[CurSize] = DAG.getUNDEF(VT);
19569       CurSize++;
19570     }
19571     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
19572       int Left = 2 * In;
19573       int Right = 2 * In + 1;
19574       SmallVector<int, 8> Mask(NumElems, -1);
19575       for (unsigned i = 0; i != NumElems; ++i) {
19576         if (VectorMask[i] == Left) {
19577           Mask[i] = i;
19578           VectorMask[i] = In;
19579         } else if (VectorMask[i] == Right) {
19580           Mask[i] = i + NumElems;
19581           VectorMask[i] = In;
19582         }
19583       }
19584
19585       Shuffles[In] =
19586           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
19587     }
19588   }
19589   return Shuffles[0];
19590 }
19591
19592 // Try to turn a build vector of zero extends of extract vector elts into a
19593 // a vector zero extend and possibly an extract subvector.
19594 // TODO: Support sign extend?
19595 // TODO: Allow undef elements?
19596 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
19597   if (LegalOperations)
19598     return SDValue();
19599
19600   EVT VT = N->getValueType(0);
19601
19602   bool FoundZeroExtend = false;
19603   SDValue Op0 = N->getOperand(0);
19604   auto checkElem = [&](SDValue Op) -> int64_t {
19605     unsigned Opc = Op.getOpcode();
19606     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
19607     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
19608         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19609         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
19610       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
19611         return C->getZExtValue();
19612     return -1;
19613   };
19614
19615   // Make sure the first element matches
19616   // (zext (extract_vector_elt X, C))
19617   int64_t Offset = checkElem(Op0);
19618   if (Offset < 0)
19619     return SDValue();
19620
19621   unsigned NumElems = N->getNumOperands();
19622   SDValue In = Op0.getOperand(0).getOperand(0);
19623   EVT InSVT = In.getValueType().getScalarType();
19624   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
19625
19626   // Don't create an illegal input type after type legalization.
19627   if (LegalTypes && !TLI.isTypeLegal(InVT))
19628     return SDValue();
19629
19630   // Ensure all the elements come from the same vector and are adjacent.
19631   for (unsigned i = 1; i != NumElems; ++i) {
19632     if ((Offset + i) != checkElem(N->getOperand(i)))
19633       return SDValue();
19634   }
19635
19636   SDLoc DL(N);
19637   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
19638                    Op0.getOperand(0).getOperand(1));
19639   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
19640                      VT, In);
19641 }
19642
19643 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
19644   EVT VT = N->getValueType(0);
19645
19646   // A vector built entirely of undefs is undef.
19647   if (ISD::allOperandsUndef(N))
19648     return DAG.getUNDEF(VT);
19649
19650   // If this is a splat of a bitcast from another vector, change to a
19651   // concat_vector.
19652   // For example:
19653   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
19654   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
19655   //
19656   // If X is a build_vector itself, the concat can become a larger build_vector.
19657   // TODO: Maybe this is useful for non-splat too?
19658   if (!LegalOperations) {
19659     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19660       Splat = peekThroughBitcasts(Splat);
19661       EVT SrcVT = Splat.getValueType();
19662       if (SrcVT.isVector()) {
19663         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
19664         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
19665                                      SrcVT.getVectorElementType(), NumElts);
19666         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
19667           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
19668           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
19669                                        NewVT, Ops);
19670           return DAG.getBitcast(VT, Concat);
19671         }
19672       }
19673     }
19674   }
19675
19676   // Check if we can express BUILD VECTOR via subvector extract.
19677   if (!LegalTypes && (N->getNumOperands() > 1)) {
19678     SDValue Op0 = N->getOperand(0);
19679     auto checkElem = [&](SDValue Op) -> uint64_t {
19680       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
19681           (Op0.getOperand(0) == Op.getOperand(0)))
19682         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
19683           return CNode->getZExtValue();
19684       return -1;
19685     };
19686
19687     int Offset = checkElem(Op0);
19688     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
19689       if (Offset + i != checkElem(N->getOperand(i))) {
19690         Offset = -1;
19691         break;
19692       }
19693     }
19694
19695     if ((Offset == 0) &&
19696         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
19697       return Op0.getOperand(0);
19698     if ((Offset != -1) &&
19699         ((Offset % N->getValueType(0).getVectorNumElements()) ==
19700          0)) // IDX must be multiple of output size.
19701       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
19702                          Op0.getOperand(0), Op0.getOperand(1));
19703   }
19704
19705   if (SDValue V = convertBuildVecZextToZext(N))
19706     return V;
19707
19708   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
19709     return V;
19710
19711   if (SDValue V = reduceBuildVecTruncToBitCast(N))
19712     return V;
19713
19714   if (SDValue V = reduceBuildVecToShuffle(N))
19715     return V;
19716
19717   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
19718   // Do this late as some of the above may replace the splat.
19719   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
19720     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19721       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
19722       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
19723     }
19724
19725   return SDValue();
19726 }
19727
19728 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
19729   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19730   EVT OpVT = N->getOperand(0).getValueType();
19731
19732   // If the operands are legal vectors, leave them alone.
19733   if (TLI.isTypeLegal(OpVT))
19734     return SDValue();
19735
19736   SDLoc DL(N);
19737   EVT VT = N->getValueType(0);
19738   SmallVector<SDValue, 8> Ops;
19739
19740   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
19741   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19742
19743   // Keep track of what we encounter.
19744   bool AnyInteger = false;
19745   bool AnyFP = false;
19746   for (const SDValue &Op : N->ops()) {
19747     if (ISD::BITCAST == Op.getOpcode() &&
19748         !Op.getOperand(0).getValueType().isVector())
19749       Ops.push_back(Op.getOperand(0));
19750     else if (ISD::UNDEF == Op.getOpcode())
19751       Ops.push_back(ScalarUndef);
19752     else
19753       return SDValue();
19754
19755     // Note whether we encounter an integer or floating point scalar.
19756     // If it's neither, bail out, it could be something weird like x86mmx.
19757     EVT LastOpVT = Ops.back().getValueType();
19758     if (LastOpVT.isFloatingPoint())
19759       AnyFP = true;
19760     else if (LastOpVT.isInteger())
19761       AnyInteger = true;
19762     else
19763       return SDValue();
19764   }
19765
19766   // If any of the operands is a floating point scalar bitcast to a vector,
19767   // use floating point types throughout, and bitcast everything.
19768   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
19769   if (AnyFP) {
19770     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
19771     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19772     if (AnyInteger) {
19773       for (SDValue &Op : Ops) {
19774         if (Op.getValueType() == SVT)
19775           continue;
19776         if (Op.isUndef())
19777           Op = ScalarUndef;
19778         else
19779           Op = DAG.getBitcast(SVT, Op);
19780       }
19781     }
19782   }
19783
19784   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
19785                                VT.getSizeInBits() / SVT.getSizeInBits());
19786   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
19787 }
19788
19789 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
19790 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
19791 // most two distinct vectors the same size as the result, attempt to turn this
19792 // into a legal shuffle.
19793 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
19794   EVT VT = N->getValueType(0);
19795   EVT OpVT = N->getOperand(0).getValueType();
19796
19797   // We currently can't generate an appropriate shuffle for a scalable vector.
19798   if (VT.isScalableVector())
19799     return SDValue();
19800
19801   int NumElts = VT.getVectorNumElements();
19802   int NumOpElts = OpVT.getVectorNumElements();
19803
19804   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
19805   SmallVector<int, 8> Mask;
19806
19807   for (SDValue Op : N->ops()) {
19808     Op = peekThroughBitcasts(Op);
19809
19810     // UNDEF nodes convert to UNDEF shuffle mask values.
19811     if (Op.isUndef()) {
19812       Mask.append((unsigned)NumOpElts, -1);
19813       continue;
19814     }
19815
19816     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19817       return SDValue();
19818
19819     // What vector are we extracting the subvector from and at what index?
19820     SDValue ExtVec = Op.getOperand(0);
19821     int ExtIdx = Op.getConstantOperandVal(1);
19822
19823     // We want the EVT of the original extraction to correctly scale the
19824     // extraction index.
19825     EVT ExtVT = ExtVec.getValueType();
19826     ExtVec = peekThroughBitcasts(ExtVec);
19827
19828     // UNDEF nodes convert to UNDEF shuffle mask values.
19829     if (ExtVec.isUndef()) {
19830       Mask.append((unsigned)NumOpElts, -1);
19831       continue;
19832     }
19833
19834     // Ensure that we are extracting a subvector from a vector the same
19835     // size as the result.
19836     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
19837       return SDValue();
19838
19839     // Scale the subvector index to account for any bitcast.
19840     int NumExtElts = ExtVT.getVectorNumElements();
19841     if (0 == (NumExtElts % NumElts))
19842       ExtIdx /= (NumExtElts / NumElts);
19843     else if (0 == (NumElts % NumExtElts))
19844       ExtIdx *= (NumElts / NumExtElts);
19845     else
19846       return SDValue();
19847
19848     // At most we can reference 2 inputs in the final shuffle.
19849     if (SV0.isUndef() || SV0 == ExtVec) {
19850       SV0 = ExtVec;
19851       for (int i = 0; i != NumOpElts; ++i)
19852         Mask.push_back(i + ExtIdx);
19853     } else if (SV1.isUndef() || SV1 == ExtVec) {
19854       SV1 = ExtVec;
19855       for (int i = 0; i != NumOpElts; ++i)
19856         Mask.push_back(i + ExtIdx + NumElts);
19857     } else {
19858       return SDValue();
19859     }
19860   }
19861
19862   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19863   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
19864                                      DAG.getBitcast(VT, SV1), Mask, DAG);
19865 }
19866
19867 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
19868   unsigned CastOpcode = N->getOperand(0).getOpcode();
19869   switch (CastOpcode) {
19870   case ISD::SINT_TO_FP:
19871   case ISD::UINT_TO_FP:
19872   case ISD::FP_TO_SINT:
19873   case ISD::FP_TO_UINT:
19874     // TODO: Allow more opcodes?
19875     //  case ISD::BITCAST:
19876     //  case ISD::TRUNCATE:
19877     //  case ISD::ZERO_EXTEND:
19878     //  case ISD::SIGN_EXTEND:
19879     //  case ISD::FP_EXTEND:
19880     break;
19881   default:
19882     return SDValue();
19883   }
19884
19885   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
19886   if (!SrcVT.isVector())
19887     return SDValue();
19888
19889   // All operands of the concat must be the same kind of cast from the same
19890   // source type.
19891   SmallVector<SDValue, 4> SrcOps;
19892   for (SDValue Op : N->ops()) {
19893     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
19894         Op.getOperand(0).getValueType() != SrcVT)
19895       return SDValue();
19896     SrcOps.push_back(Op.getOperand(0));
19897   }
19898
19899   // The wider cast must be supported by the target. This is unusual because
19900   // the operation support type parameter depends on the opcode. In addition,
19901   // check the other type in the cast to make sure this is really legal.
19902   EVT VT = N->getValueType(0);
19903   EVT SrcEltVT = SrcVT.getVectorElementType();
19904   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
19905   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
19906   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19907   switch (CastOpcode) {
19908   case ISD::SINT_TO_FP:
19909   case ISD::UINT_TO_FP:
19910     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
19911         !TLI.isTypeLegal(VT))
19912       return SDValue();
19913     break;
19914   case ISD::FP_TO_SINT:
19915   case ISD::FP_TO_UINT:
19916     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
19917         !TLI.isTypeLegal(ConcatSrcVT))
19918       return SDValue();
19919     break;
19920   default:
19921     llvm_unreachable("Unexpected cast opcode");
19922   }
19923
19924   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
19925   SDLoc DL(N);
19926   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
19927   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
19928 }
19929
19930 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
19931   // If we only have one input vector, we don't need to do any concatenation.
19932   if (N->getNumOperands() == 1)
19933     return N->getOperand(0);
19934
19935   // Check if all of the operands are undefs.
19936   EVT VT = N->getValueType(0);
19937   if (ISD::allOperandsUndef(N))
19938     return DAG.getUNDEF(VT);
19939
19940   // Optimize concat_vectors where all but the first of the vectors are undef.
19941   if (all_of(drop_begin(N->ops()),
19942              [](const SDValue &Op) { return Op.isUndef(); })) {
19943     SDValue In = N->getOperand(0);
19944     assert(In.getValueType().isVector() && "Must concat vectors");
19945
19946     // If the input is a concat_vectors, just make a larger concat by padding
19947     // with smaller undefs.
19948     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
19949       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
19950       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
19951       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
19952       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19953     }
19954
19955     SDValue Scalar = peekThroughOneUseBitcasts(In);
19956
19957     // concat_vectors(scalar_to_vector(scalar), undef) ->
19958     //     scalar_to_vector(scalar)
19959     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19960          Scalar.hasOneUse()) {
19961       EVT SVT = Scalar.getValueType().getVectorElementType();
19962       if (SVT == Scalar.getOperand(0).getValueType())
19963         Scalar = Scalar.getOperand(0);
19964     }
19965
19966     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
19967     if (!Scalar.getValueType().isVector()) {
19968       // If the bitcast type isn't legal, it might be a trunc of a legal type;
19969       // look through the trunc so we can still do the transform:
19970       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
19971       if (Scalar->getOpcode() == ISD::TRUNCATE &&
19972           !TLI.isTypeLegal(Scalar.getValueType()) &&
19973           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
19974         Scalar = Scalar->getOperand(0);
19975
19976       EVT SclTy = Scalar.getValueType();
19977
19978       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
19979         return SDValue();
19980
19981       // Bail out if the vector size is not a multiple of the scalar size.
19982       if (VT.getSizeInBits() % SclTy.getSizeInBits())
19983         return SDValue();
19984
19985       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
19986       if (VNTNumElms < 2)
19987         return SDValue();
19988
19989       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
19990       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
19991         return SDValue();
19992
19993       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
19994       return DAG.getBitcast(VT, Res);
19995     }
19996   }
19997
19998   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
19999   // We have already tested above for an UNDEF only concatenation.
20000   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20001   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20002   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20003     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20004   };
20005   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20006     SmallVector<SDValue, 8> Opnds;
20007     EVT SVT = VT.getScalarType();
20008
20009     EVT MinVT = SVT;
20010     if (!SVT.isFloatingPoint()) {
20011       // If BUILD_VECTOR are from built from integer, they may have different
20012       // operand types. Get the smallest type and truncate all operands to it.
20013       bool FoundMinVT = false;
20014       for (const SDValue &Op : N->ops())
20015         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20016           EVT OpSVT = Op.getOperand(0).getValueType();
20017           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20018           FoundMinVT = true;
20019         }
20020       assert(FoundMinVT && "Concat vector type mismatch");
20021     }
20022
20023     for (const SDValue &Op : N->ops()) {
20024       EVT OpVT = Op.getValueType();
20025       unsigned NumElts = OpVT.getVectorNumElements();
20026
20027       if (ISD::UNDEF == Op.getOpcode())
20028         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20029
20030       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20031         if (SVT.isFloatingPoint()) {
20032           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
20033           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20034         } else {
20035           for (unsigned i = 0; i != NumElts; ++i)
20036             Opnds.push_back(
20037                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20038         }
20039       }
20040     }
20041
20042     assert(VT.getVectorNumElements() == Opnds.size() &&
20043            "Concat vector type mismatch");
20044     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20045   }
20046
20047   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20048   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
20049     return V;
20050
20051   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20052   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
20053     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
20054       return V;
20055
20056   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20057     return V;
20058
20059   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20060   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20061   // operands and look for a CONCAT operations that place the incoming vectors
20062   // at the exact same location.
20063   //
20064   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20065   SDValue SingleSource = SDValue();
20066   unsigned PartNumElem =
20067       N->getOperand(0).getValueType().getVectorMinNumElements();
20068
20069   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20070     SDValue Op = N->getOperand(i);
20071
20072     if (Op.isUndef())
20073       continue;
20074
20075     // Check if this is the identity extract:
20076     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20077       return SDValue();
20078
20079     // Find the single incoming vector for the extract_subvector.
20080     if (SingleSource.getNode()) {
20081       if (Op.getOperand(0) != SingleSource)
20082         return SDValue();
20083     } else {
20084       SingleSource = Op.getOperand(0);
20085
20086       // Check the source type is the same as the type of the result.
20087       // If not, this concat may extend the vector, so we can not
20088       // optimize it away.
20089       if (SingleSource.getValueType() != N->getValueType(0))
20090         return SDValue();
20091     }
20092
20093     // Check that we are reading from the identity index.
20094     unsigned IdentityIndex = i * PartNumElem;
20095     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20096       return SDValue();
20097   }
20098
20099   if (SingleSource.getNode())
20100     return SingleSource;
20101
20102   return SDValue();
20103 }
20104
20105 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20106 // if the subvector can be sourced for free.
20107 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
20108   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20109       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20110     return V.getOperand(1);
20111   }
20112   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20113   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
20114       V.getOperand(0).getValueType() == SubVT &&
20115       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
20116     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
20117     return V.getOperand(SubIdx);
20118   }
20119   return SDValue();
20120 }
20121
20122 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
20123                                               SelectionDAG &DAG,
20124                                               bool LegalOperations) {
20125   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20126   SDValue BinOp = Extract->getOperand(0);
20127   unsigned BinOpcode = BinOp.getOpcode();
20128   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
20129     return SDValue();
20130
20131   EVT VecVT = BinOp.getValueType();
20132   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
20133   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
20134     return SDValue();
20135
20136   SDValue Index = Extract->getOperand(1);
20137   EVT SubVT = Extract->getValueType(0);
20138   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
20139     return SDValue();
20140
20141   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
20142   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
20143
20144   // TODO: We could handle the case where only 1 operand is being inserted by
20145   //       creating an extract of the other operand, but that requires checking
20146   //       number of uses and/or costs.
20147   if (!Sub0 || !Sub1)
20148     return SDValue();
20149
20150   // We are inserting both operands of the wide binop only to extract back
20151   // to the narrow vector size. Eliminate all of the insert/extract:
20152   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
20153   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
20154                      BinOp->getFlags());
20155 }
20156
20157 /// If we are extracting a subvector produced by a wide binary operator try
20158 /// to use a narrow binary operator and/or avoid concatenation and extraction.
20159 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
20160                                           bool LegalOperations) {
20161   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
20162   // some of these bailouts with other transforms.
20163
20164   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20165     return V;
20166
20167   // The extract index must be a constant, so we can map it to a concat operand.
20168   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20169   if (!ExtractIndexC)
20170     return SDValue();
20171
20172   // We are looking for an optionally bitcasted wide vector binary operator
20173   // feeding an extract subvector.
20174   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20175   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20176   unsigned BOpcode = BinOp.getOpcode();
20177   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
20178     return SDValue();
20179
20180   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20181   // reduced to the unary fneg when it is visited, and we probably want to deal
20182   // with fneg in a target-specific way.
20183   if (BOpcode == ISD::FSUB) {
20184     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20185     if (C && C->getValueAPF().isNegZero())
20186       return SDValue();
20187   }
20188
20189   // The binop must be a vector type, so we can extract some fraction of it.
20190   EVT WideBVT = BinOp.getValueType();
20191   // The optimisations below currently assume we are dealing with fixed length
20192   // vectors. It is possible to add support for scalable vectors, but at the
20193   // moment we've done no analysis to prove whether they are profitable or not.
20194   if (!WideBVT.isFixedLengthVector())
20195     return SDValue();
20196
20197   EVT VT = Extract->getValueType(0);
20198   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
20199   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
20200          "Extract index is not a multiple of the vector length.");
20201
20202   // Bail out if this is not a proper multiple width extraction.
20203   unsigned WideWidth = WideBVT.getSizeInBits();
20204   unsigned NarrowWidth = VT.getSizeInBits();
20205   if (WideWidth % NarrowWidth != 0)
20206     return SDValue();
20207
20208   // Bail out if we are extracting a fraction of a single operation. This can
20209   // occur because we potentially looked through a bitcast of the binop.
20210   unsigned NarrowingRatio = WideWidth / NarrowWidth;
20211   unsigned WideNumElts = WideBVT.getVectorNumElements();
20212   if (WideNumElts % NarrowingRatio != 0)
20213     return SDValue();
20214
20215   // Bail out if the target does not support a narrower version of the binop.
20216   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
20217                                    WideNumElts / NarrowingRatio);
20218   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
20219     return SDValue();
20220
20221   // If extraction is cheap, we don't need to look at the binop operands
20222   // for concat ops. The narrow binop alone makes this transform profitable.
20223   // We can't just reuse the original extract index operand because we may have
20224   // bitcasted.
20225   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
20226   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
20227   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
20228       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
20229     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
20230     SDLoc DL(Extract);
20231     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20232     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20233                             BinOp.getOperand(0), NewExtIndex);
20234     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20235                             BinOp.getOperand(1), NewExtIndex);
20236     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
20237                                       BinOp.getNode()->getFlags());
20238     return DAG.getBitcast(VT, NarrowBinOp);
20239   }
20240
20241   // Only handle the case where we are doubling and then halving. A larger ratio
20242   // may require more than two narrow binops to replace the wide binop.
20243   if (NarrowingRatio != 2)
20244     return SDValue();
20245
20246   // TODO: The motivating case for this transform is an x86 AVX1 target. That
20247   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20248   // flavors, but no other 256-bit integer support. This could be extended to
20249   // handle any binop, but that may require fixing/adding other folds to avoid
20250   // codegen regressions.
20251   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20252     return SDValue();
20253
20254   // We need at least one concatenation operation of a binop operand to make
20255   // this transform worthwhile. The concat must double the input vector sizes.
20256   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20257     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20258       return V.getOperand(ConcatOpNum);
20259     return SDValue();
20260   };
20261   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20262   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20263
20264   if (SubVecL || SubVecR) {
20265     // If a binop operand was not the result of a concat, we must extract a
20266     // half-sized operand for our new narrow binop:
20267     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20268     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20269     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20270     SDLoc DL(Extract);
20271     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20272     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20273                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20274                                       BinOp.getOperand(0), IndexC);
20275
20276     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20277                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20278                                       BinOp.getOperand(1), IndexC);
20279
20280     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20281     return DAG.getBitcast(VT, NarrowBinOp);
20282   }
20283
20284   return SDValue();
20285 }
20286
20287 /// If we are extracting a subvector from a wide vector load, convert to a
20288 /// narrow load to eliminate the extraction:
20289 /// (extract_subvector (load wide vector)) --> (load narrow vector)
20290 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
20291   // TODO: Add support for big-endian. The offset calculation must be adjusted.
20292   if (DAG.getDataLayout().isBigEndian())
20293     return SDValue();
20294
20295   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20296   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20297   if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
20298       !ExtIdx)
20299     return SDValue();
20300
20301   // Allow targets to opt-out.
20302   EVT VT = Extract->getValueType(0);
20303
20304   // We can only create byte sized loads.
20305   if (!VT.isByteSized())
20306     return SDValue();
20307
20308   unsigned Index = ExtIdx->getZExtValue();
20309   unsigned NumElts = VT.getVectorMinNumElements();
20310
20311   // The definition of EXTRACT_SUBVECTOR states that the index must be a
20312   // multiple of the minimum number of elements in the result type.
20313   assert(Index % NumElts == 0 && "The extract subvector index is not a "
20314                                  "multiple of the result's element count");
20315
20316   // It's fine to use TypeSize here as we know the offset will not be negative.
20317   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20318
20319   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20320   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20321     return SDValue();
20322
20323   // The narrow load will be offset from the base address of the old load if
20324   // we are extracting from something besides index 0 (little-endian).
20325   SDLoc DL(Extract);
20326
20327   // TODO: Use "BaseIndexOffset" to make this more effective.
20328   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20329
20330   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
20331   MachineFunction &MF = DAG.getMachineFunction();
20332   MachineMemOperand *MMO;
20333   if (Offset.isScalable()) {
20334     MachinePointerInfo MPI =
20335         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20336     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20337   } else
20338     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20339                                   StoreSize);
20340
20341   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20342   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20343   return NewLd;
20344 }
20345
20346 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
20347   EVT NVT = N->getValueType(0);
20348   SDValue V = N->getOperand(0);
20349   uint64_t ExtIdx = N->getConstantOperandVal(1);
20350
20351   // Extract from UNDEF is UNDEF.
20352   if (V.isUndef())
20353     return DAG.getUNDEF(NVT);
20354
20355   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
20356     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
20357       return NarrowLoad;
20358
20359   // Combine an extract of an extract into a single extract_subvector.
20360   // ext (ext X, C), 0 --> ext X, C
20361   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
20362     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
20363                                     V.getConstantOperandVal(1)) &&
20364         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
20365       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
20366                          V.getOperand(1));
20367     }
20368   }
20369
20370   // Try to move vector bitcast after extract_subv by scaling extraction index:
20371   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
20372   if (V.getOpcode() == ISD::BITCAST &&
20373       V.getOperand(0).getValueType().isVector() &&
20374       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
20375     SDValue SrcOp = V.getOperand(0);
20376     EVT SrcVT = SrcOp.getValueType();
20377     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
20378     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
20379     if ((SrcNumElts % DestNumElts) == 0) {
20380       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
20381       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
20382       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
20383                                       NewExtEC);
20384       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20385         SDLoc DL(N);
20386         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
20387         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20388                                          V.getOperand(0), NewIndex);
20389         return DAG.getBitcast(NVT, NewExtract);
20390       }
20391     }
20392     if ((DestNumElts % SrcNumElts) == 0) {
20393       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
20394       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
20395         ElementCount NewExtEC =
20396             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
20397         EVT ScalarVT = SrcVT.getScalarType();
20398         if ((ExtIdx % DestSrcRatio) == 0) {
20399           SDLoc DL(N);
20400           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
20401           EVT NewExtVT =
20402               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
20403           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20404             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20405             SDValue NewExtract =
20406                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20407                             V.getOperand(0), NewIndex);
20408             return DAG.getBitcast(NVT, NewExtract);
20409           }
20410           if (NewExtEC.isScalar() &&
20411               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
20412             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20413             SDValue NewExtract =
20414                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
20415                             V.getOperand(0), NewIndex);
20416             return DAG.getBitcast(NVT, NewExtract);
20417           }
20418         }
20419       }
20420     }
20421   }
20422
20423   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
20424     unsigned ExtNumElts = NVT.getVectorMinNumElements();
20425     EVT ConcatSrcVT = V.getOperand(0).getValueType();
20426     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
20427            "Concat and extract subvector do not change element type");
20428     assert((ExtIdx % ExtNumElts) == 0 &&
20429            "Extract index is not a multiple of the input vector length.");
20430
20431     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
20432     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
20433
20434     // If the concatenated source types match this extract, it's a direct
20435     // simplification:
20436     // extract_subvec (concat V1, V2, ...), i --> Vi
20437     if (ConcatSrcNumElts == ExtNumElts)
20438       return V.getOperand(ConcatOpIdx);
20439
20440     // If the concatenated source vectors are a multiple length of this extract,
20441     // then extract a fraction of one of those source vectors directly from a
20442     // concat operand. Example:
20443     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
20444     //   v2i8 extract_subvec v8i8 Y, 6
20445     if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
20446       SDLoc DL(N);
20447       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
20448       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
20449              "Trying to extract from >1 concat operand?");
20450       assert(NewExtIdx % ExtNumElts == 0 &&
20451              "Extract index is not a multiple of the input vector length.");
20452       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
20453       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
20454                          V.getOperand(ConcatOpIdx), NewIndexC);
20455     }
20456   }
20457
20458   V = peekThroughBitcasts(V);
20459
20460   // If the input is a build vector. Try to make a smaller build vector.
20461   if (V.getOpcode() == ISD::BUILD_VECTOR) {
20462     EVT InVT = V.getValueType();
20463     unsigned ExtractSize = NVT.getSizeInBits();
20464     unsigned EltSize = InVT.getScalarSizeInBits();
20465     // Only do this if we won't split any elements.
20466     if (ExtractSize % EltSize == 0) {
20467       unsigned NumElems = ExtractSize / EltSize;
20468       EVT EltVT = InVT.getVectorElementType();
20469       EVT ExtractVT =
20470           NumElems == 1 ? EltVT
20471                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
20472       if ((Level < AfterLegalizeDAG ||
20473            (NumElems == 1 ||
20474             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
20475           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
20476         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
20477
20478         if (NumElems == 1) {
20479           SDValue Src = V->getOperand(IdxVal);
20480           if (EltVT != Src.getValueType())
20481             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
20482           return DAG.getBitcast(NVT, Src);
20483         }
20484
20485         // Extract the pieces from the original build_vector.
20486         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
20487                                               V->ops().slice(IdxVal, NumElems));
20488         return DAG.getBitcast(NVT, BuildVec);
20489       }
20490     }
20491   }
20492
20493   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
20494     // Handle only simple case where vector being inserted and vector
20495     // being extracted are of same size.
20496     EVT SmallVT = V.getOperand(1).getValueType();
20497     if (!NVT.bitsEq(SmallVT))
20498       return SDValue();
20499
20500     // Combine:
20501     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
20502     // Into:
20503     //    indices are equal or bit offsets are equal => V1
20504     //    otherwise => (extract_subvec V1, ExtIdx)
20505     uint64_t InsIdx = V.getConstantOperandVal(2);
20506     if (InsIdx * SmallVT.getScalarSizeInBits() ==
20507         ExtIdx * NVT.getScalarSizeInBits())
20508       return DAG.getBitcast(NVT, V.getOperand(1));
20509     return DAG.getNode(
20510         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
20511         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
20512         N->getOperand(1));
20513   }
20514
20515   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
20516     return NarrowBOp;
20517
20518   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20519     return SDValue(N, 0);
20520
20521   return SDValue();
20522 }
20523
20524 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
20525 /// followed by concatenation. Narrow vector ops may have better performance
20526 /// than wide ops, and this can unlock further narrowing of other vector ops.
20527 /// Targets can invert this transform later if it is not profitable.
20528 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
20529                                          SelectionDAG &DAG) {
20530   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
20531   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
20532       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
20533       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
20534     return SDValue();
20535
20536   // Split the wide shuffle mask into halves. Any mask element that is accessing
20537   // operand 1 is offset down to account for narrowing of the vectors.
20538   ArrayRef<int> Mask = Shuf->getMask();
20539   EVT VT = Shuf->getValueType(0);
20540   unsigned NumElts = VT.getVectorNumElements();
20541   unsigned HalfNumElts = NumElts / 2;
20542   SmallVector<int, 16> Mask0(HalfNumElts, -1);
20543   SmallVector<int, 16> Mask1(HalfNumElts, -1);
20544   for (unsigned i = 0; i != NumElts; ++i) {
20545     if (Mask[i] == -1)
20546       continue;
20547     // If we reference the upper (undef) subvector then the element is undef.
20548     if ((Mask[i] % NumElts) >= HalfNumElts)
20549       continue;
20550     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
20551     if (i < HalfNumElts)
20552       Mask0[i] = M;
20553     else
20554       Mask1[i - HalfNumElts] = M;
20555   }
20556
20557   // Ask the target if this is a valid transform.
20558   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20559   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
20560                                 HalfNumElts);
20561   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
20562       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
20563     return SDValue();
20564
20565   // shuffle (concat X, undef), (concat Y, undef), Mask -->
20566   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
20567   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
20568   SDLoc DL(Shuf);
20569   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
20570   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
20571   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
20572 }
20573
20574 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
20575 // or turn a shuffle of a single concat into simpler shuffle then concat.
20576 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
20577   EVT VT = N->getValueType(0);
20578   unsigned NumElts = VT.getVectorNumElements();
20579
20580   SDValue N0 = N->getOperand(0);
20581   SDValue N1 = N->getOperand(1);
20582   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20583   ArrayRef<int> Mask = SVN->getMask();
20584
20585   SmallVector<SDValue, 4> Ops;
20586   EVT ConcatVT = N0.getOperand(0).getValueType();
20587   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
20588   unsigned NumConcats = NumElts / NumElemsPerConcat;
20589
20590   auto IsUndefMaskElt = [](int i) { return i == -1; };
20591
20592   // Special case: shuffle(concat(A,B)) can be more efficiently represented
20593   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
20594   // half vector elements.
20595   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
20596       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
20597                    IsUndefMaskElt)) {
20598     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
20599                               N0.getOperand(1),
20600                               Mask.slice(0, NumElemsPerConcat));
20601     N1 = DAG.getUNDEF(ConcatVT);
20602     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
20603   }
20604
20605   // Look at every vector that's inserted. We're looking for exact
20606   // subvector-sized copies from a concatenated vector
20607   for (unsigned I = 0; I != NumConcats; ++I) {
20608     unsigned Begin = I * NumElemsPerConcat;
20609     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
20610
20611     // Make sure we're dealing with a copy.
20612     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
20613       Ops.push_back(DAG.getUNDEF(ConcatVT));
20614       continue;
20615     }
20616
20617     int OpIdx = -1;
20618     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
20619       if (IsUndefMaskElt(SubMask[i]))
20620         continue;
20621       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
20622         return SDValue();
20623       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
20624       if (0 <= OpIdx && EltOpIdx != OpIdx)
20625         return SDValue();
20626       OpIdx = EltOpIdx;
20627     }
20628     assert(0 <= OpIdx && "Unknown concat_vectors op");
20629
20630     if (OpIdx < (int)N0.getNumOperands())
20631       Ops.push_back(N0.getOperand(OpIdx));
20632     else
20633       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
20634   }
20635
20636   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20637 }
20638
20639 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20640 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20641 //
20642 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
20643 // a simplification in some sense, but it isn't appropriate in general: some
20644 // BUILD_VECTORs are substantially cheaper than others. The general case
20645 // of a BUILD_VECTOR requires inserting each element individually (or
20646 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
20647 // all constants is a single constant pool load.  A BUILD_VECTOR where each
20648 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
20649 // are undef lowers to a small number of element insertions.
20650 //
20651 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
20652 // We don't fold shuffles where one side is a non-zero constant, and we don't
20653 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
20654 // non-constant operands. This seems to work out reasonably well in practice.
20655 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
20656                                        SelectionDAG &DAG,
20657                                        const TargetLowering &TLI) {
20658   EVT VT = SVN->getValueType(0);
20659   unsigned NumElts = VT.getVectorNumElements();
20660   SDValue N0 = SVN->getOperand(0);
20661   SDValue N1 = SVN->getOperand(1);
20662
20663   if (!N0->hasOneUse())
20664     return SDValue();
20665
20666   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
20667   // discussed above.
20668   if (!N1.isUndef()) {
20669     if (!N1->hasOneUse())
20670       return SDValue();
20671
20672     bool N0AnyConst = isAnyConstantBuildVector(N0);
20673     bool N1AnyConst = isAnyConstantBuildVector(N1);
20674     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
20675       return SDValue();
20676     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
20677       return SDValue();
20678   }
20679
20680   // If both inputs are splats of the same value then we can safely merge this
20681   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
20682   bool IsSplat = false;
20683   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
20684   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
20685   if (BV0 && BV1)
20686     if (SDValue Splat0 = BV0->getSplatValue())
20687       IsSplat = (Splat0 == BV1->getSplatValue());
20688
20689   SmallVector<SDValue, 8> Ops;
20690   SmallSet<SDValue, 16> DuplicateOps;
20691   for (int M : SVN->getMask()) {
20692     SDValue Op = DAG.getUNDEF(VT.getScalarType());
20693     if (M >= 0) {
20694       int Idx = M < (int)NumElts ? M : M - NumElts;
20695       SDValue &S = (M < (int)NumElts ? N0 : N1);
20696       if (S.getOpcode() == ISD::BUILD_VECTOR) {
20697         Op = S.getOperand(Idx);
20698       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
20699         SDValue Op0 = S.getOperand(0);
20700         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
20701       } else {
20702         // Operand can't be combined - bail out.
20703         return SDValue();
20704       }
20705     }
20706
20707     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
20708     // generating a splat; semantically, this is fine, but it's likely to
20709     // generate low-quality code if the target can't reconstruct an appropriate
20710     // shuffle.
20711     if (!Op.isUndef() && !isIntOrFPConstant(Op))
20712       if (!IsSplat && !DuplicateOps.insert(Op).second)
20713         return SDValue();
20714
20715     Ops.push_back(Op);
20716   }
20717
20718   // BUILD_VECTOR requires all inputs to be of the same type, find the
20719   // maximum type and extend them all.
20720   EVT SVT = VT.getScalarType();
20721   if (SVT.isInteger())
20722     for (SDValue &Op : Ops)
20723       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
20724   if (SVT != VT.getScalarType())
20725     for (SDValue &Op : Ops)
20726       Op = TLI.isZExtFree(Op.getValueType(), SVT)
20727                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
20728                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
20729   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
20730 }
20731
20732 // Match shuffles that can be converted to any_vector_extend_in_reg.
20733 // This is often generated during legalization.
20734 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
20735 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
20736 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
20737                                             SelectionDAG &DAG,
20738                                             const TargetLowering &TLI,
20739                                             bool LegalOperations) {
20740   EVT VT = SVN->getValueType(0);
20741   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20742
20743   // TODO Add support for big-endian when we have a test case.
20744   if (!VT.isInteger() || IsBigEndian)
20745     return SDValue();
20746
20747   unsigned NumElts = VT.getVectorNumElements();
20748   unsigned EltSizeInBits = VT.getScalarSizeInBits();
20749   ArrayRef<int> Mask = SVN->getMask();
20750   SDValue N0 = SVN->getOperand(0);
20751
20752   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
20753   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
20754     for (unsigned i = 0; i != NumElts; ++i) {
20755       if (Mask[i] < 0)
20756         continue;
20757       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
20758         continue;
20759       return false;
20760     }
20761     return true;
20762   };
20763
20764   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
20765   // power-of-2 extensions as they are the most likely.
20766   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
20767     // Check for non power of 2 vector sizes
20768     if (NumElts % Scale != 0)
20769       continue;
20770     if (!isAnyExtend(Scale))
20771       continue;
20772
20773     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
20774     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
20775     // Never create an illegal type. Only create unsupported operations if we
20776     // are pre-legalization.
20777     if (TLI.isTypeLegal(OutVT))
20778       if (!LegalOperations ||
20779           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
20780         return DAG.getBitcast(VT,
20781                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
20782                                           SDLoc(SVN), OutVT, N0));
20783   }
20784
20785   return SDValue();
20786 }
20787
20788 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
20789 // each source element of a large type into the lowest elements of a smaller
20790 // destination type. This is often generated during legalization.
20791 // If the source node itself was a '*_extend_vector_inreg' node then we should
20792 // then be able to remove it.
20793 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
20794                                         SelectionDAG &DAG) {
20795   EVT VT = SVN->getValueType(0);
20796   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20797
20798   // TODO Add support for big-endian when we have a test case.
20799   if (!VT.isInteger() || IsBigEndian)
20800     return SDValue();
20801
20802   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
20803
20804   unsigned Opcode = N0.getOpcode();
20805   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
20806       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
20807       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
20808     return SDValue();
20809
20810   SDValue N00 = N0.getOperand(0);
20811   ArrayRef<int> Mask = SVN->getMask();
20812   unsigned NumElts = VT.getVectorNumElements();
20813   unsigned EltSizeInBits = VT.getScalarSizeInBits();
20814   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
20815   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
20816
20817   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
20818     return SDValue();
20819   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
20820
20821   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
20822   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
20823   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
20824   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
20825     for (unsigned i = 0; i != NumElts; ++i) {
20826       if (Mask[i] < 0)
20827         continue;
20828       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
20829         continue;
20830       return false;
20831     }
20832     return true;
20833   };
20834
20835   // At the moment we just handle the case where we've truncated back to the
20836   // same size as before the extension.
20837   // TODO: handle more extension/truncation cases as cases arise.
20838   if (EltSizeInBits != ExtSrcSizeInBits)
20839     return SDValue();
20840
20841   // We can remove *extend_vector_inreg only if the truncation happens at
20842   // the same scale as the extension.
20843   if (isTruncate(ExtScale))
20844     return DAG.getBitcast(VT, N00);
20845
20846   return SDValue();
20847 }
20848
20849 // Combine shuffles of splat-shuffles of the form:
20850 // shuffle (shuffle V, undef, splat-mask), undef, M
20851 // If splat-mask contains undef elements, we need to be careful about
20852 // introducing undef's in the folded mask which are not the result of composing
20853 // the masks of the shuffles.
20854 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
20855                                         SelectionDAG &DAG) {
20856   if (!Shuf->getOperand(1).isUndef())
20857     return SDValue();
20858   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20859   if (!Splat || !Splat->isSplat())
20860     return SDValue();
20861
20862   ArrayRef<int> ShufMask = Shuf->getMask();
20863   ArrayRef<int> SplatMask = Splat->getMask();
20864   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
20865
20866   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
20867   // every undef mask element in the splat-shuffle has a corresponding undef
20868   // element in the user-shuffle's mask or if the composition of mask elements
20869   // would result in undef.
20870   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
20871   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
20872   //   In this case it is not legal to simplify to the splat-shuffle because we
20873   //   may be exposing the users of the shuffle an undef element at index 1
20874   //   which was not there before the combine.
20875   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
20876   //   In this case the composition of masks yields SplatMask, so it's ok to
20877   //   simplify to the splat-shuffle.
20878   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
20879   //   In this case the composed mask includes all undef elements of SplatMask
20880   //   and in addition sets element zero to undef. It is safe to simplify to
20881   //   the splat-shuffle.
20882   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
20883                                        ArrayRef<int> SplatMask) {
20884     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
20885       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
20886           SplatMask[UserMask[i]] != -1)
20887         return false;
20888     return true;
20889   };
20890   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
20891     return Shuf->getOperand(0);
20892
20893   // Create a new shuffle with a mask that is composed of the two shuffles'
20894   // masks.
20895   SmallVector<int, 32> NewMask;
20896   for (int Idx : ShufMask)
20897     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
20898
20899   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
20900                               Splat->getOperand(0), Splat->getOperand(1),
20901                               NewMask);
20902 }
20903
20904 /// Combine shuffle of shuffle of the form:
20905 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
20906 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
20907                                      SelectionDAG &DAG) {
20908   if (!OuterShuf->getOperand(1).isUndef())
20909     return SDValue();
20910   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
20911   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
20912     return SDValue();
20913
20914   ArrayRef<int> OuterMask = OuterShuf->getMask();
20915   ArrayRef<int> InnerMask = InnerShuf->getMask();
20916   unsigned NumElts = OuterMask.size();
20917   assert(NumElts == InnerMask.size() && "Mask length mismatch");
20918   SmallVector<int, 32> CombinedMask(NumElts, -1);
20919   int SplatIndex = -1;
20920   for (unsigned i = 0; i != NumElts; ++i) {
20921     // Undef lanes remain undef.
20922     int OuterMaskElt = OuterMask[i];
20923     if (OuterMaskElt == -1)
20924       continue;
20925
20926     // Peek through the shuffle masks to get the underlying source element.
20927     int InnerMaskElt = InnerMask[OuterMaskElt];
20928     if (InnerMaskElt == -1)
20929       continue;
20930
20931     // Initialize the splatted element.
20932     if (SplatIndex == -1)
20933       SplatIndex = InnerMaskElt;
20934
20935     // Non-matching index - this is not a splat.
20936     if (SplatIndex != InnerMaskElt)
20937       return SDValue();
20938
20939     CombinedMask[i] = InnerMaskElt;
20940   }
20941   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
20942           getSplatIndex(CombinedMask) != -1) &&
20943          "Expected a splat mask");
20944
20945   // TODO: The transform may be a win even if the mask is not legal.
20946   EVT VT = OuterShuf->getValueType(0);
20947   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
20948   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
20949     return SDValue();
20950
20951   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
20952                               InnerShuf->getOperand(1), CombinedMask);
20953 }
20954
20955 /// If the shuffle mask is taking exactly one element from the first vector
20956 /// operand and passing through all other elements from the second vector
20957 /// operand, return the index of the mask element that is choosing an element
20958 /// from the first operand. Otherwise, return -1.
20959 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
20960   int MaskSize = Mask.size();
20961   int EltFromOp0 = -1;
20962   // TODO: This does not match if there are undef elements in the shuffle mask.
20963   // Should we ignore undefs in the shuffle mask instead? The trade-off is
20964   // removing an instruction (a shuffle), but losing the knowledge that some
20965   // vector lanes are not needed.
20966   for (int i = 0; i != MaskSize; ++i) {
20967     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
20968       // We're looking for a shuffle of exactly one element from operand 0.
20969       if (EltFromOp0 != -1)
20970         return -1;
20971       EltFromOp0 = i;
20972     } else if (Mask[i] != i + MaskSize) {
20973       // Nothing from operand 1 can change lanes.
20974       return -1;
20975     }
20976   }
20977   return EltFromOp0;
20978 }
20979
20980 /// If a shuffle inserts exactly one element from a source vector operand into
20981 /// another vector operand and we can access the specified element as a scalar,
20982 /// then we can eliminate the shuffle.
20983 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
20984                                       SelectionDAG &DAG) {
20985   // First, check if we are taking one element of a vector and shuffling that
20986   // element into another vector.
20987   ArrayRef<int> Mask = Shuf->getMask();
20988   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
20989   SDValue Op0 = Shuf->getOperand(0);
20990   SDValue Op1 = Shuf->getOperand(1);
20991   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
20992   if (ShufOp0Index == -1) {
20993     // Commute mask and check again.
20994     ShuffleVectorSDNode::commuteMask(CommutedMask);
20995     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
20996     if (ShufOp0Index == -1)
20997       return SDValue();
20998     // Commute operands to match the commuted shuffle mask.
20999     std::swap(Op0, Op1);
21000     Mask = CommutedMask;
21001   }
21002
21003   // The shuffle inserts exactly one element from operand 0 into operand 1.
21004   // Now see if we can access that element as a scalar via a real insert element
21005   // instruction.
21006   // TODO: We can try harder to locate the element as a scalar. Examples: it
21007   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
21008   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
21009          "Shuffle mask value must be from operand 0");
21010   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
21011     return SDValue();
21012
21013   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
21014   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
21015     return SDValue();
21016
21017   // There's an existing insertelement with constant insertion index, so we
21018   // don't need to check the legality/profitability of a replacement operation
21019   // that differs at most in the constant value. The target should be able to
21020   // lower any of those in a similar way. If not, legalization will expand this
21021   // to a scalar-to-vector plus shuffle.
21022   //
21023   // Note that the shuffle may move the scalar from the position that the insert
21024   // element used. Therefore, our new insert element occurs at the shuffle's
21025   // mask index value, not the insert's index value.
21026   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
21027   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
21028   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
21029                      Op1, Op0.getOperand(1), NewInsIndex);
21030 }
21031
21032 /// If we have a unary shuffle of a shuffle, see if it can be folded away
21033 /// completely. This has the potential to lose undef knowledge because the first
21034 /// shuffle may not have an undef mask element where the second one does. So
21035 /// only call this after doing simplifications based on demanded elements.
21036 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
21037   // shuf (shuf0 X, Y, Mask0), undef, Mask
21038   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21039   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
21040     return SDValue();
21041
21042   ArrayRef<int> Mask = Shuf->getMask();
21043   ArrayRef<int> Mask0 = Shuf0->getMask();
21044   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
21045     // Ignore undef elements.
21046     if (Mask[i] == -1)
21047       continue;
21048     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
21049
21050     // Is the element of the shuffle operand chosen by this shuffle the same as
21051     // the element chosen by the shuffle operand itself?
21052     if (Mask0[Mask[i]] != Mask0[i])
21053       return SDValue();
21054   }
21055   // Every element of this shuffle is identical to the result of the previous
21056   // shuffle, so we can replace this value.
21057   return Shuf->getOperand(0);
21058 }
21059
21060 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
21061   EVT VT = N->getValueType(0);
21062   unsigned NumElts = VT.getVectorNumElements();
21063
21064   SDValue N0 = N->getOperand(0);
21065   SDValue N1 = N->getOperand(1);
21066
21067   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
21068
21069   // Canonicalize shuffle undef, undef -> undef
21070   if (N0.isUndef() && N1.isUndef())
21071     return DAG.getUNDEF(VT);
21072
21073   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21074
21075   // Canonicalize shuffle v, v -> v, undef
21076   if (N0 == N1) {
21077     SmallVector<int, 8> NewMask;
21078     for (unsigned i = 0; i != NumElts; ++i) {
21079       int Idx = SVN->getMaskElt(i);
21080       if (Idx >= (int)NumElts) Idx -= NumElts;
21081       NewMask.push_back(Idx);
21082     }
21083     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
21084   }
21085
21086   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
21087   if (N0.isUndef())
21088     return DAG.getCommutedVectorShuffle(*SVN);
21089
21090   // Remove references to rhs if it is undef
21091   if (N1.isUndef()) {
21092     bool Changed = false;
21093     SmallVector<int, 8> NewMask;
21094     for (unsigned i = 0; i != NumElts; ++i) {
21095       int Idx = SVN->getMaskElt(i);
21096       if (Idx >= (int)NumElts) {
21097         Idx = -1;
21098         Changed = true;
21099       }
21100       NewMask.push_back(Idx);
21101     }
21102     if (Changed)
21103       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
21104   }
21105
21106   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
21107     return InsElt;
21108
21109   // A shuffle of a single vector that is a splatted value can always be folded.
21110   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
21111     return V;
21112
21113   if (SDValue V = formSplatFromShuffles(SVN, DAG))
21114     return V;
21115
21116   // If it is a splat, check if the argument vector is another splat or a
21117   // build_vector.
21118   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
21119     int SplatIndex = SVN->getSplatIndex();
21120     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
21121         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
21122       // splat (vector_bo L, R), Index -->
21123       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
21124       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
21125       SDLoc DL(N);
21126       EVT EltVT = VT.getScalarType();
21127       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
21128       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
21129       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
21130       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
21131                                   N0.getNode()->getFlags());
21132       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
21133       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
21134       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
21135     }
21136
21137     // If this is a bit convert that changes the element type of the vector but
21138     // not the number of vector elements, look through it.  Be careful not to
21139     // look though conversions that change things like v4f32 to v2f64.
21140     SDNode *V = N0.getNode();
21141     if (V->getOpcode() == ISD::BITCAST) {
21142       SDValue ConvInput = V->getOperand(0);
21143       if (ConvInput.getValueType().isVector() &&
21144           ConvInput.getValueType().getVectorNumElements() == NumElts)
21145         V = ConvInput.getNode();
21146     }
21147
21148     if (V->getOpcode() == ISD::BUILD_VECTOR) {
21149       assert(V->getNumOperands() == NumElts &&
21150              "BUILD_VECTOR has wrong number of operands");
21151       SDValue Base;
21152       bool AllSame = true;
21153       for (unsigned i = 0; i != NumElts; ++i) {
21154         if (!V->getOperand(i).isUndef()) {
21155           Base = V->getOperand(i);
21156           break;
21157         }
21158       }
21159       // Splat of <u, u, u, u>, return <u, u, u, u>
21160       if (!Base.getNode())
21161         return N0;
21162       for (unsigned i = 0; i != NumElts; ++i) {
21163         if (V->getOperand(i) != Base) {
21164           AllSame = false;
21165           break;
21166         }
21167       }
21168       // Splat of <x, x, x, x>, return <x, x, x, x>
21169       if (AllSame)
21170         return N0;
21171
21172       // Canonicalize any other splat as a build_vector.
21173       SDValue Splatted = V->getOperand(SplatIndex);
21174       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
21175       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
21176
21177       // We may have jumped through bitcasts, so the type of the
21178       // BUILD_VECTOR may not match the type of the shuffle.
21179       if (V->getValueType(0) != VT)
21180         NewBV = DAG.getBitcast(VT, NewBV);
21181       return NewBV;
21182     }
21183   }
21184
21185   // Simplify source operands based on shuffle mask.
21186   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21187     return SDValue(N, 0);
21188
21189   // This is intentionally placed after demanded elements simplification because
21190   // it could eliminate knowledge of undef elements created by this shuffle.
21191   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
21192     return ShufOp;
21193
21194   // Match shuffles that can be converted to any_vector_extend_in_reg.
21195   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
21196     return V;
21197
21198   // Combine "truncate_vector_in_reg" style shuffles.
21199   if (SDValue V = combineTruncationShuffle(SVN, DAG))
21200     return V;
21201
21202   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
21203       Level < AfterLegalizeVectorOps &&
21204       (N1.isUndef() ||
21205       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
21206        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
21207     if (SDValue V = partitionShuffleOfConcats(N, DAG))
21208       return V;
21209   }
21210
21211   // A shuffle of a concat of the same narrow vector can be reduced to use
21212   // only low-half elements of a concat with undef:
21213   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
21214   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
21215       N0.getNumOperands() == 2 &&
21216       N0.getOperand(0) == N0.getOperand(1)) {
21217     int HalfNumElts = (int)NumElts / 2;
21218     SmallVector<int, 8> NewMask;
21219     for (unsigned i = 0; i != NumElts; ++i) {
21220       int Idx = SVN->getMaskElt(i);
21221       if (Idx >= HalfNumElts) {
21222         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
21223         Idx -= HalfNumElts;
21224       }
21225       NewMask.push_back(Idx);
21226     }
21227     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
21228       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
21229       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
21230                                    N0.getOperand(0), UndefVec);
21231       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
21232     }
21233   }
21234
21235   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21236   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21237   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
21238     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
21239       return Res;
21240
21241   // If this shuffle only has a single input that is a bitcasted shuffle,
21242   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
21243   // back to their original types.
21244   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
21245       N1.isUndef() && Level < AfterLegalizeVectorOps &&
21246       TLI.isTypeLegal(VT)) {
21247
21248     SDValue BC0 = peekThroughOneUseBitcasts(N0);
21249     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
21250       EVT SVT = VT.getScalarType();
21251       EVT InnerVT = BC0->getValueType(0);
21252       EVT InnerSVT = InnerVT.getScalarType();
21253
21254       // Determine which shuffle works with the smaller scalar type.
21255       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
21256       EVT ScaleSVT = ScaleVT.getScalarType();
21257
21258       if (TLI.isTypeLegal(ScaleVT) &&
21259           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
21260           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
21261         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21262         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21263
21264         // Scale the shuffle masks to the smaller scalar type.
21265         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
21266         SmallVector<int, 8> InnerMask;
21267         SmallVector<int, 8> OuterMask;
21268         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
21269         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
21270
21271         // Merge the shuffle masks.
21272         SmallVector<int, 8> NewMask;
21273         for (int M : OuterMask)
21274           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
21275
21276         // Test for shuffle mask legality over both commutations.
21277         SDValue SV0 = BC0->getOperand(0);
21278         SDValue SV1 = BC0->getOperand(1);
21279         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21280         if (!LegalMask) {
21281           std::swap(SV0, SV1);
21282           ShuffleVectorSDNode::commuteMask(NewMask);
21283           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21284         }
21285
21286         if (LegalMask) {
21287           SV0 = DAG.getBitcast(ScaleVT, SV0);
21288           SV1 = DAG.getBitcast(ScaleVT, SV1);
21289           return DAG.getBitcast(
21290               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
21291         }
21292       }
21293     }
21294   }
21295
21296   // Compute the combined shuffle mask for a shuffle with SV0 as the first
21297   // operand, and SV1 as the second operand.
21298   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
21299   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
21300   auto MergeInnerShuffle =
21301       [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
21302                      ShuffleVectorSDNode *OtherSVN, SDValue N1,
21303                      const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
21304                      SmallVectorImpl<int> &Mask) -> bool {
21305     // Don't try to fold splats; they're likely to simplify somehow, or they
21306     // might be free.
21307     if (OtherSVN->isSplat())
21308       return false;
21309
21310     SV0 = SV1 = SDValue();
21311     Mask.clear();
21312
21313     for (unsigned i = 0; i != NumElts; ++i) {
21314       int Idx = SVN->getMaskElt(i);
21315       if (Idx < 0) {
21316         // Propagate Undef.
21317         Mask.push_back(Idx);
21318         continue;
21319       }
21320
21321       if (Commute)
21322         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
21323
21324       SDValue CurrentVec;
21325       if (Idx < (int)NumElts) {
21326         // This shuffle index refers to the inner shuffle N0. Lookup the inner
21327         // shuffle mask to identify which vector is actually referenced.
21328         Idx = OtherSVN->getMaskElt(Idx);
21329         if (Idx < 0) {
21330           // Propagate Undef.
21331           Mask.push_back(Idx);
21332           continue;
21333         }
21334         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
21335                                           : OtherSVN->getOperand(1);
21336       } else {
21337         // This shuffle index references an element within N1.
21338         CurrentVec = N1;
21339       }
21340
21341       // Simple case where 'CurrentVec' is UNDEF.
21342       if (CurrentVec.isUndef()) {
21343         Mask.push_back(-1);
21344         continue;
21345       }
21346
21347       // Canonicalize the shuffle index. We don't know yet if CurrentVec
21348       // will be the first or second operand of the combined shuffle.
21349       Idx = Idx % NumElts;
21350       if (!SV0.getNode() || SV0 == CurrentVec) {
21351         // Ok. CurrentVec is the left hand side.
21352         // Update the mask accordingly.
21353         SV0 = CurrentVec;
21354         Mask.push_back(Idx);
21355         continue;
21356       }
21357       if (!SV1.getNode() || SV1 == CurrentVec) {
21358         // Ok. CurrentVec is the right hand side.
21359         // Update the mask accordingly.
21360         SV1 = CurrentVec;
21361         Mask.push_back(Idx + NumElts);
21362         continue;
21363       }
21364
21365       // Last chance - see if the vector is another shuffle and if it
21366       // uses one of the existing candidate shuffle ops.
21367       if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
21368         int InnerIdx = CurrentSVN->getMaskElt(Idx);
21369         if (InnerIdx < 0) {
21370           Mask.push_back(-1);
21371           continue;
21372         }
21373         SDValue InnerVec = (InnerIdx < (int)NumElts)
21374                                ? CurrentSVN->getOperand(0)
21375                                : CurrentSVN->getOperand(1);
21376         if (InnerVec.isUndef()) {
21377           Mask.push_back(-1);
21378           continue;
21379         }
21380         InnerIdx %= NumElts;
21381         if (InnerVec == SV0) {
21382           Mask.push_back(InnerIdx);
21383           continue;
21384         }
21385         if (InnerVec == SV1) {
21386           Mask.push_back(InnerIdx + NumElts);
21387           continue;
21388         }
21389       }
21390
21391       // Bail out if we cannot convert the shuffle pair into a single shuffle.
21392       return false;
21393     }
21394
21395     if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21396       return true;
21397
21398     // Avoid introducing shuffles with illegal mask.
21399     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21400     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21401     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21402     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
21403     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
21404     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
21405     if (TLI.isShuffleMaskLegal(Mask, VT))
21406       return true;
21407
21408     std::swap(SV0, SV1);
21409     ShuffleVectorSDNode::commuteMask(Mask);
21410     return TLI.isShuffleMaskLegal(Mask, VT);
21411   };
21412
21413   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
21414     // Canonicalize shuffles according to rules:
21415     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
21416     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
21417     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
21418     if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21419         N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
21420       // The incoming shuffle must be of the same type as the result of the
21421       // current shuffle.
21422       assert(N1->getOperand(0).getValueType() == VT &&
21423              "Shuffle types don't match");
21424
21425       SDValue SV0 = N1->getOperand(0);
21426       SDValue SV1 = N1->getOperand(1);
21427       bool HasSameOp0 = N0 == SV0;
21428       bool IsSV1Undef = SV1.isUndef();
21429       if (HasSameOp0 || IsSV1Undef || N0 == SV1)
21430         // Commute the operands of this shuffle so merging below will trigger.
21431         return DAG.getCommutedVectorShuffle(*SVN);
21432     }
21433
21434     // Canonicalize splat shuffles to the RHS to improve merging below.
21435     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
21436     if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
21437         N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21438         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
21439         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
21440       return DAG.getCommutedVectorShuffle(*SVN);
21441     }
21442
21443     // Try to fold according to rules:
21444     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21445     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21446     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21447     // Don't try to fold shuffles with illegal type.
21448     // Only fold if this shuffle is the only user of the other shuffle.
21449     // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
21450     for (int i = 0; i != 2; ++i) {
21451       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
21452           N->isOnlyUserOf(N->getOperand(i).getNode())) {
21453         // The incoming shuffle must be of the same type as the result of the
21454         // current shuffle.
21455         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
21456         assert(OtherSV->getOperand(0).getValueType() == VT &&
21457                "Shuffle types don't match");
21458
21459         SDValue SV0, SV1;
21460         SmallVector<int, 4> Mask;
21461         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
21462                               SV0, SV1, Mask)) {
21463           // Check if all indices in Mask are Undef. In case, propagate Undef.
21464           if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21465             return DAG.getUNDEF(VT);
21466
21467           return DAG.getVectorShuffle(VT, SDLoc(N),
21468                                       SV0 ? SV0 : DAG.getUNDEF(VT),
21469                                       SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
21470         }
21471       }
21472     }
21473
21474     // Merge shuffles through binops if we are able to merge it with at least
21475     // one other shuffles.
21476     // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
21477     // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
21478     unsigned SrcOpcode = N0.getOpcode();
21479     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
21480         (N1.isUndef() ||
21481          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
21482       // Get binop source ops, or just pass on the undef.
21483       SDValue Op00 = N0.getOperand(0);
21484       SDValue Op01 = N0.getOperand(1);
21485       SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
21486       SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
21487       // TODO: We might be able to relax the VT check but we don't currently
21488       // have any isBinOp() that has different result/ops VTs so play safe until
21489       // we have test coverage.
21490       if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
21491           Op01.getValueType() == VT && Op11.getValueType() == VT &&
21492           (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
21493            Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
21494            Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
21495            Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
21496         auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
21497                                         SmallVectorImpl<int> &Mask, bool LeftOp,
21498                                         bool Commute) {
21499           SDValue InnerN = Commute ? N1 : N0;
21500           SDValue Op0 = LeftOp ? Op00 : Op01;
21501           SDValue Op1 = LeftOp ? Op10 : Op11;
21502           if (Commute)
21503             std::swap(Op0, Op1);
21504           // Only accept the merged shuffle if we don't introduce undef elements,
21505           // or the inner shuffle already contained undef elements.
21506           auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
21507           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
21508                  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
21509                                    Mask) &&
21510                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
21511                   llvm::none_of(Mask, [](int M) { return M < 0; }));
21512         };
21513
21514         // Ensure we don't increase the number of shuffles - we must merge a
21515         // shuffle from at least one of the LHS and RHS ops.
21516         bool MergedLeft = false;
21517         SDValue LeftSV0, LeftSV1;
21518         SmallVector<int, 4> LeftMask;
21519         if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
21520             CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
21521           MergedLeft = true;
21522         } else {
21523           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21524           LeftSV0 = Op00, LeftSV1 = Op10;
21525         }
21526
21527         bool MergedRight = false;
21528         SDValue RightSV0, RightSV1;
21529         SmallVector<int, 4> RightMask;
21530         if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
21531             CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
21532           MergedRight = true;
21533         } else {
21534           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21535           RightSV0 = Op01, RightSV1 = Op11;
21536         }
21537
21538         if (MergedLeft || MergedRight) {
21539           SDLoc DL(N);
21540           SDValue LHS = DAG.getVectorShuffle(
21541               VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
21542               LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
21543           SDValue RHS = DAG.getVectorShuffle(
21544               VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
21545               RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
21546           return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
21547         }
21548       }
21549     }
21550   }
21551
21552   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
21553     return V;
21554
21555   return SDValue();
21556 }
21557
21558 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
21559   SDValue InVal = N->getOperand(0);
21560   EVT VT = N->getValueType(0);
21561
21562   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
21563   // with a VECTOR_SHUFFLE and possible truncate.
21564   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21565       VT.isFixedLengthVector() &&
21566       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
21567     SDValue InVec = InVal->getOperand(0);
21568     SDValue EltNo = InVal->getOperand(1);
21569     auto InVecT = InVec.getValueType();
21570     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
21571       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
21572       int Elt = C0->getZExtValue();
21573       NewMask[0] = Elt;
21574       // If we have an implict truncate do truncate here as long as it's legal.
21575       // if it's not legal, this should
21576       if (VT.getScalarType() != InVal.getValueType() &&
21577           InVal.getValueType().isScalarInteger() &&
21578           isTypeLegal(VT.getScalarType())) {
21579         SDValue Val =
21580             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
21581         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
21582       }
21583       if (VT.getScalarType() == InVecT.getScalarType() &&
21584           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
21585         SDValue LegalShuffle =
21586           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
21587                                       DAG.getUNDEF(InVecT), NewMask, DAG);
21588         if (LegalShuffle) {
21589           // If the initial vector is the correct size this shuffle is a
21590           // valid result.
21591           if (VT == InVecT)
21592             return LegalShuffle;
21593           // If not we must truncate the vector.
21594           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
21595             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
21596             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
21597                                          InVecT.getVectorElementType(),
21598                                          VT.getVectorNumElements());
21599             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
21600                                LegalShuffle, ZeroIdx);
21601           }
21602         }
21603       }
21604     }
21605   }
21606
21607   return SDValue();
21608 }
21609
21610 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
21611   EVT VT = N->getValueType(0);
21612   SDValue N0 = N->getOperand(0);
21613   SDValue N1 = N->getOperand(1);
21614   SDValue N2 = N->getOperand(2);
21615   uint64_t InsIdx = N->getConstantOperandVal(2);
21616
21617   // If inserting an UNDEF, just return the original vector.
21618   if (N1.isUndef())
21619     return N0;
21620
21621   // If this is an insert of an extracted vector into an undef vector, we can
21622   // just use the input to the extract.
21623   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21624       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
21625     return N1.getOperand(0);
21626
21627   // If we are inserting a bitcast value into an undef, with the same
21628   // number of elements, just use the bitcast input of the extract.
21629   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
21630   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
21631   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
21632       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21633       N1.getOperand(0).getOperand(1) == N2 &&
21634       N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
21635           VT.getVectorElementCount() &&
21636       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
21637           VT.getSizeInBits()) {
21638     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
21639   }
21640
21641   // If both N1 and N2 are bitcast values on which insert_subvector
21642   // would makes sense, pull the bitcast through.
21643   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
21644   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
21645   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
21646     SDValue CN0 = N0.getOperand(0);
21647     SDValue CN1 = N1.getOperand(0);
21648     EVT CN0VT = CN0.getValueType();
21649     EVT CN1VT = CN1.getValueType();
21650     if (CN0VT.isVector() && CN1VT.isVector() &&
21651         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
21652         CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
21653       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
21654                                       CN0.getValueType(), CN0, CN1, N2);
21655       return DAG.getBitcast(VT, NewINSERT);
21656     }
21657   }
21658
21659   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
21660   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
21661   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
21662   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
21663       N0.getOperand(1).getValueType() == N1.getValueType() &&
21664       N0.getOperand(2) == N2)
21665     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
21666                        N1, N2);
21667
21668   // Eliminate an intermediate insert into an undef vector:
21669   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
21670   // insert_subvector undef, X, N2
21671   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
21672       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
21673     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
21674                        N1.getOperand(1), N2);
21675
21676   // Push subvector bitcasts to the output, adjusting the index as we go.
21677   // insert_subvector(bitcast(v), bitcast(s), c1)
21678   // -> bitcast(insert_subvector(v, s, c2))
21679   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
21680       N1.getOpcode() == ISD::BITCAST) {
21681     SDValue N0Src = peekThroughBitcasts(N0);
21682     SDValue N1Src = peekThroughBitcasts(N1);
21683     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
21684     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
21685     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
21686         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
21687       EVT NewVT;
21688       SDLoc DL(N);
21689       SDValue NewIdx;
21690       LLVMContext &Ctx = *DAG.getContext();
21691       ElementCount NumElts = VT.getVectorElementCount();
21692       unsigned EltSizeInBits = VT.getScalarSizeInBits();
21693       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
21694         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
21695         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
21696         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
21697       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
21698         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
21699         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
21700           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
21701                                    NumElts.divideCoefficientBy(Scale));
21702           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
21703         }
21704       }
21705       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
21706         SDValue Res = DAG.getBitcast(NewVT, N0Src);
21707         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
21708         return DAG.getBitcast(VT, Res);
21709       }
21710     }
21711   }
21712
21713   // Canonicalize insert_subvector dag nodes.
21714   // Example:
21715   // (insert_subvector (insert_subvector A, Idx0), Idx1)
21716   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
21717   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
21718       N1.getValueType() == N0.getOperand(1).getValueType()) {
21719     unsigned OtherIdx = N0.getConstantOperandVal(2);
21720     if (InsIdx < OtherIdx) {
21721       // Swap nodes.
21722       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
21723                                   N0.getOperand(0), N1, N2);
21724       AddToWorklist(NewOp.getNode());
21725       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
21726                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
21727     }
21728   }
21729
21730   // If the input vector is a concatenation, and the insert replaces
21731   // one of the pieces, we can optimize into a single concat_vectors.
21732   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
21733       N0.getOperand(0).getValueType() == N1.getValueType() &&
21734       N0.getOperand(0).getValueType().isScalableVector() ==
21735           N1.getValueType().isScalableVector()) {
21736     unsigned Factor = N1.getValueType().getVectorMinNumElements();
21737     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
21738     Ops[InsIdx / Factor] = N1;
21739     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21740   }
21741
21742   // Simplify source operands based on insertion.
21743   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21744     return SDValue(N, 0);
21745
21746   return SDValue();
21747 }
21748
21749 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
21750   SDValue N0 = N->getOperand(0);
21751
21752   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
21753   if (N0->getOpcode() == ISD::FP16_TO_FP)
21754     return N0->getOperand(0);
21755
21756   return SDValue();
21757 }
21758
21759 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
21760   SDValue N0 = N->getOperand(0);
21761
21762   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
21763   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
21764     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
21765     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
21766       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
21767                          N0.getOperand(0));
21768     }
21769   }
21770
21771   return SDValue();
21772 }
21773
21774 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
21775   SDValue N0 = N->getOperand(0);
21776   EVT VT = N0.getValueType();
21777   unsigned Opcode = N->getOpcode();
21778
21779   // VECREDUCE over 1-element vector is just an extract.
21780   if (VT.getVectorElementCount().isScalar()) {
21781     SDLoc dl(N);
21782     SDValue Res =
21783         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
21784                     DAG.getVectorIdxConstant(0, dl));
21785     if (Res.getValueType() != N->getValueType(0))
21786       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
21787     return Res;
21788   }
21789
21790   // On an boolean vector an and/or reduction is the same as a umin/umax
21791   // reduction. Convert them if the latter is legal while the former isn't.
21792   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
21793     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
21794         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
21795     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
21796         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
21797         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
21798       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
21799   }
21800
21801   return SDValue();
21802 }
21803
21804 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
21805 /// with the destination vector and a zero vector.
21806 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
21807 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
21808 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
21809   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
21810
21811   EVT VT = N->getValueType(0);
21812   SDValue LHS = N->getOperand(0);
21813   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
21814   SDLoc DL(N);
21815
21816   // Make sure we're not running after operation legalization where it
21817   // may have custom lowered the vector shuffles.
21818   if (LegalOperations)
21819     return SDValue();
21820
21821   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
21822     return SDValue();
21823
21824   EVT RVT = RHS.getValueType();
21825   unsigned NumElts = RHS.getNumOperands();
21826
21827   // Attempt to create a valid clear mask, splitting the mask into
21828   // sub elements and checking to see if each is
21829   // all zeros or all ones - suitable for shuffle masking.
21830   auto BuildClearMask = [&](int Split) {
21831     int NumSubElts = NumElts * Split;
21832     int NumSubBits = RVT.getScalarSizeInBits() / Split;
21833
21834     SmallVector<int, 8> Indices;
21835     for (int i = 0; i != NumSubElts; ++i) {
21836       int EltIdx = i / Split;
21837       int SubIdx = i % Split;
21838       SDValue Elt = RHS.getOperand(EltIdx);
21839       // X & undef --> 0 (not undef). So this lane must be converted to choose
21840       // from the zero constant vector (same as if the element had all 0-bits).
21841       if (Elt.isUndef()) {
21842         Indices.push_back(i + NumSubElts);
21843         continue;
21844       }
21845
21846       APInt Bits;
21847       if (isa<ConstantSDNode>(Elt))
21848         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
21849       else if (isa<ConstantFPSDNode>(Elt))
21850         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
21851       else
21852         return SDValue();
21853
21854       // Extract the sub element from the constant bit mask.
21855       if (DAG.getDataLayout().isBigEndian())
21856         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
21857       else
21858         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
21859
21860       if (Bits.isAllOnesValue())
21861         Indices.push_back(i);
21862       else if (Bits == 0)
21863         Indices.push_back(i + NumSubElts);
21864       else
21865         return SDValue();
21866     }
21867
21868     // Let's see if the target supports this vector_shuffle.
21869     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
21870     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
21871     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
21872       return SDValue();
21873
21874     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
21875     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
21876                                                    DAG.getBitcast(ClearVT, LHS),
21877                                                    Zero, Indices));
21878   };
21879
21880   // Determine maximum split level (byte level masking).
21881   int MaxSplit = 1;
21882   if (RVT.getScalarSizeInBits() % 8 == 0)
21883     MaxSplit = RVT.getScalarSizeInBits() / 8;
21884
21885   for (int Split = 1; Split <= MaxSplit; ++Split)
21886     if (RVT.getScalarSizeInBits() % Split == 0)
21887       if (SDValue S = BuildClearMask(Split))
21888         return S;
21889
21890   return SDValue();
21891 }
21892
21893 /// If a vector binop is performed on splat values, it may be profitable to
21894 /// extract, scalarize, and insert/splat.
21895 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
21896   SDValue N0 = N->getOperand(0);
21897   SDValue N1 = N->getOperand(1);
21898   unsigned Opcode = N->getOpcode();
21899   EVT VT = N->getValueType(0);
21900   EVT EltVT = VT.getVectorElementType();
21901   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21902
21903   // TODO: Remove/replace the extract cost check? If the elements are available
21904   //       as scalars, then there may be no extract cost. Should we ask if
21905   //       inserting a scalar back into a vector is cheap instead?
21906   int Index0, Index1;
21907   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
21908   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
21909   if (!Src0 || !Src1 || Index0 != Index1 ||
21910       Src0.getValueType().getVectorElementType() != EltVT ||
21911       Src1.getValueType().getVectorElementType() != EltVT ||
21912       !TLI.isExtractVecEltCheap(VT, Index0) ||
21913       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
21914     return SDValue();
21915
21916   SDLoc DL(N);
21917   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
21918   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
21919   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
21920   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
21921
21922   // If all lanes but 1 are undefined, no need to splat the scalar result.
21923   // TODO: Keep track of undefs and use that info in the general case.
21924   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
21925       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
21926       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
21927     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
21928     // build_vec ..undef, (bo X, Y), undef...
21929     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
21930     Ops[Index0] = ScalarBO;
21931     return DAG.getBuildVector(VT, DL, Ops);
21932   }
21933
21934   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
21935   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
21936   return DAG.getBuildVector(VT, DL, Ops);
21937 }
21938
21939 /// Visit a binary vector operation, like ADD.
21940 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
21941   assert(N->getValueType(0).isVector() &&
21942          "SimplifyVBinOp only works on vectors!");
21943
21944   SDValue LHS = N->getOperand(0);
21945   SDValue RHS = N->getOperand(1);
21946   SDValue Ops[] = {LHS, RHS};
21947   EVT VT = N->getValueType(0);
21948   unsigned Opcode = N->getOpcode();
21949   SDNodeFlags Flags = N->getFlags();
21950
21951   // See if we can constant fold the vector operation.
21952   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
21953           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
21954     return Fold;
21955
21956   // Move unary shuffles with identical masks after a vector binop:
21957   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
21958   //   --> shuffle (VBinOp A, B), Undef, Mask
21959   // This does not require type legality checks because we are creating the
21960   // same types of operations that are in the original sequence. We do have to
21961   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
21962   // though. This code is adapted from the identical transform in instcombine.
21963   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
21964       Opcode != ISD::UREM && Opcode != ISD::SREM &&
21965       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
21966     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
21967     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
21968     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
21969         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
21970         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
21971       SDLoc DL(N);
21972       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
21973                                      RHS.getOperand(0), Flags);
21974       SDValue UndefV = LHS.getOperand(1);
21975       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
21976     }
21977
21978     // Try to sink a splat shuffle after a binop with a uniform constant.
21979     // This is limited to cases where neither the shuffle nor the constant have
21980     // undefined elements because that could be poison-unsafe or inhibit
21981     // demanded elements analysis. It is further limited to not change a splat
21982     // of an inserted scalar because that may be optimized better by
21983     // load-folding or other target-specific behaviors.
21984     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
21985         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
21986         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
21987       // binop (splat X), (splat C) --> splat (binop X, C)
21988       SDLoc DL(N);
21989       SDValue X = Shuf0->getOperand(0);
21990       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
21991       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
21992                                   Shuf0->getMask());
21993     }
21994     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
21995         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
21996         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
21997       // binop (splat C), (splat X) --> splat (binop C, X)
21998       SDLoc DL(N);
21999       SDValue X = Shuf1->getOperand(0);
22000       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
22001       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22002                                   Shuf1->getMask());
22003     }
22004   }
22005
22006   // The following pattern is likely to emerge with vector reduction ops. Moving
22007   // the binary operation ahead of insertion may allow using a narrower vector
22008   // instruction that has better performance than the wide version of the op:
22009   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
22010   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
22011       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
22012       LHS.getOperand(2) == RHS.getOperand(2) &&
22013       (LHS.hasOneUse() || RHS.hasOneUse())) {
22014     SDValue X = LHS.getOperand(1);
22015     SDValue Y = RHS.getOperand(1);
22016     SDValue Z = LHS.getOperand(2);
22017     EVT NarrowVT = X.getValueType();
22018     if (NarrowVT == Y.getValueType() &&
22019         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
22020                                               LegalOperations)) {
22021       // (binop undef, undef) may not return undef, so compute that result.
22022       SDLoc DL(N);
22023       SDValue VecC =
22024           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
22025       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
22026       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
22027     }
22028   }
22029
22030   // Make sure all but the first op are undef or constant.
22031   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
22032     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
22033            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
22034              return Op.isUndef() ||
22035                     ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
22036            });
22037   };
22038
22039   // The following pattern is likely to emerge with vector reduction ops. Moving
22040   // the binary operation ahead of the concat may allow using a narrower vector
22041   // instruction that has better performance than the wide version of the op:
22042   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
22043   //   concat (VBinOp X, Y), VecC
22044   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
22045       (LHS.hasOneUse() || RHS.hasOneUse())) {
22046     EVT NarrowVT = LHS.getOperand(0).getValueType();
22047     if (NarrowVT == RHS.getOperand(0).getValueType() &&
22048         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
22049       SDLoc DL(N);
22050       unsigned NumOperands = LHS.getNumOperands();
22051       SmallVector<SDValue, 4> ConcatOps;
22052       for (unsigned i = 0; i != NumOperands; ++i) {
22053         // This constant fold for operands 1 and up.
22054         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
22055                                         RHS.getOperand(i)));
22056       }
22057
22058       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22059     }
22060   }
22061
22062   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
22063     return V;
22064
22065   return SDValue();
22066 }
22067
22068 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
22069                                     SDValue N2) {
22070   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
22071
22072   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
22073                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
22074
22075   // If we got a simplified select_cc node back from SimplifySelectCC, then
22076   // break it down into a new SETCC node, and a new SELECT node, and then return
22077   // the SELECT node, since we were called with a SELECT node.
22078   if (SCC.getNode()) {
22079     // Check to see if we got a select_cc back (to turn into setcc/select).
22080     // Otherwise, just return whatever node we got back, like fabs.
22081     if (SCC.getOpcode() == ISD::SELECT_CC) {
22082       const SDNodeFlags Flags = N0.getNode()->getFlags();
22083       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
22084                                   N0.getValueType(),
22085                                   SCC.getOperand(0), SCC.getOperand(1),
22086                                   SCC.getOperand(4), Flags);
22087       AddToWorklist(SETCC.getNode());
22088       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
22089                                          SCC.getOperand(2), SCC.getOperand(3));
22090       SelectNode->setFlags(Flags);
22091       return SelectNode;
22092     }
22093
22094     return SCC;
22095   }
22096   return SDValue();
22097 }
22098
22099 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
22100 /// being selected between, see if we can simplify the select.  Callers of this
22101 /// should assume that TheSelect is deleted if this returns true.  As such, they
22102 /// should return the appropriate thing (e.g. the node) back to the top-level of
22103 /// the DAG combiner loop to avoid it being looked at.
22104 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
22105                                     SDValue RHS) {
22106   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22107   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
22108   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
22109     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
22110       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
22111       SDValue Sqrt = RHS;
22112       ISD::CondCode CC;
22113       SDValue CmpLHS;
22114       const ConstantFPSDNode *Zero = nullptr;
22115
22116       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
22117         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
22118         CmpLHS = TheSelect->getOperand(0);
22119         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
22120       } else {
22121         // SELECT or VSELECT
22122         SDValue Cmp = TheSelect->getOperand(0);
22123         if (Cmp.getOpcode() == ISD::SETCC) {
22124           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
22125           CmpLHS = Cmp.getOperand(0);
22126           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
22127         }
22128       }
22129       if (Zero && Zero->isZero() &&
22130           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
22131           CC == ISD::SETULT || CC == ISD::SETLT)) {
22132         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22133         CombineTo(TheSelect, Sqrt);
22134         return true;
22135       }
22136     }
22137   }
22138   // Cannot simplify select with vector condition
22139   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
22140
22141   // If this is a select from two identical things, try to pull the operation
22142   // through the select.
22143   if (LHS.getOpcode() != RHS.getOpcode() ||
22144       !LHS.hasOneUse() || !RHS.hasOneUse())
22145     return false;
22146
22147   // If this is a load and the token chain is identical, replace the select
22148   // of two loads with a load through a select of the address to load from.
22149   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
22150   // constants have been dropped into the constant pool.
22151   if (LHS.getOpcode() == ISD::LOAD) {
22152     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
22153     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
22154
22155     // Token chains must be identical.
22156     if (LHS.getOperand(0) != RHS.getOperand(0) ||
22157         // Do not let this transformation reduce the number of volatile loads.
22158         // Be conservative for atomics for the moment
22159         // TODO: This does appear to be legal for unordered atomics (see D66309)
22160         !LLD->isSimple() || !RLD->isSimple() ||
22161         // FIXME: If either is a pre/post inc/dec load,
22162         // we'd need to split out the address adjustment.
22163         LLD->isIndexed() || RLD->isIndexed() ||
22164         // If this is an EXTLOAD, the VT's must match.
22165         LLD->getMemoryVT() != RLD->getMemoryVT() ||
22166         // If this is an EXTLOAD, the kind of extension must match.
22167         (LLD->getExtensionType() != RLD->getExtensionType() &&
22168          // The only exception is if one of the extensions is anyext.
22169          LLD->getExtensionType() != ISD::EXTLOAD &&
22170          RLD->getExtensionType() != ISD::EXTLOAD) ||
22171         // FIXME: this discards src value information.  This is
22172         // over-conservative. It would be beneficial to be able to remember
22173         // both potential memory locations.  Since we are discarding
22174         // src value info, don't do the transformation if the memory
22175         // locations are not in the default address space.
22176         LLD->getPointerInfo().getAddrSpace() != 0 ||
22177         RLD->getPointerInfo().getAddrSpace() != 0 ||
22178         // We can't produce a CMOV of a TargetFrameIndex since we won't
22179         // generate the address generation required.
22180         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22181         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22182         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
22183                                       LLD->getBasePtr().getValueType()))
22184       return false;
22185
22186     // The loads must not depend on one another.
22187     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
22188       return false;
22189
22190     // Check that the select condition doesn't reach either load.  If so,
22191     // folding this will induce a cycle into the DAG.  If not, this is safe to
22192     // xform, so create a select of the addresses.
22193
22194     SmallPtrSet<const SDNode *, 32> Visited;
22195     SmallVector<const SDNode *, 16> Worklist;
22196
22197     // Always fail if LLD and RLD are not independent. TheSelect is a
22198     // predecessor to all Nodes in question so we need not search past it.
22199
22200     Visited.insert(TheSelect);
22201     Worklist.push_back(LLD);
22202     Worklist.push_back(RLD);
22203
22204     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
22205         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
22206       return false;
22207
22208     SDValue Addr;
22209     if (TheSelect->getOpcode() == ISD::SELECT) {
22210       // We cannot do this optimization if any pair of {RLD, LLD} is a
22211       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
22212       // Loads, we only need to check if CondNode is a successor to one of the
22213       // loads. We can further avoid this if there's no use of their chain
22214       // value.
22215       SDNode *CondNode = TheSelect->getOperand(0).getNode();
22216       Worklist.push_back(CondNode);
22217
22218       if ((LLD->hasAnyUseOfValue(1) &&
22219            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22220           (RLD->hasAnyUseOfValue(1) &&
22221            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22222         return false;
22223
22224       Addr = DAG.getSelect(SDLoc(TheSelect),
22225                            LLD->getBasePtr().getValueType(),
22226                            TheSelect->getOperand(0), LLD->getBasePtr(),
22227                            RLD->getBasePtr());
22228     } else {  // Otherwise SELECT_CC
22229       // We cannot do this optimization if any pair of {RLD, LLD} is a
22230       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
22231       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
22232       // one of the loads. We can further avoid this if there's no use of their
22233       // chain value.
22234
22235       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
22236       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
22237       Worklist.push_back(CondLHS);
22238       Worklist.push_back(CondRHS);
22239
22240       if ((LLD->hasAnyUseOfValue(1) &&
22241            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22242           (RLD->hasAnyUseOfValue(1) &&
22243            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22244         return false;
22245
22246       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
22247                          LLD->getBasePtr().getValueType(),
22248                          TheSelect->getOperand(0),
22249                          TheSelect->getOperand(1),
22250                          LLD->getBasePtr(), RLD->getBasePtr(),
22251                          TheSelect->getOperand(4));
22252     }
22253
22254     SDValue Load;
22255     // It is safe to replace the two loads if they have different alignments,
22256     // but the new load must be the minimum (most restrictive) alignment of the
22257     // inputs.
22258     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
22259     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
22260     if (!RLD->isInvariant())
22261       MMOFlags &= ~MachineMemOperand::MOInvariant;
22262     if (!RLD->isDereferenceable())
22263       MMOFlags &= ~MachineMemOperand::MODereferenceable;
22264     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
22265       // FIXME: Discards pointer and AA info.
22266       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
22267                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
22268                          MMOFlags);
22269     } else {
22270       // FIXME: Discards pointer and AA info.
22271       Load = DAG.getExtLoad(
22272           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
22273                                                   : LLD->getExtensionType(),
22274           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
22275           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
22276     }
22277
22278     // Users of the select now use the result of the load.
22279     CombineTo(TheSelect, Load);
22280
22281     // Users of the old loads now use the new load's chain.  We know the
22282     // old-load value is dead now.
22283     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
22284     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
22285     return true;
22286   }
22287
22288   return false;
22289 }
22290
22291 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
22292 /// bitwise 'and'.
22293 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
22294                                             SDValue N1, SDValue N2, SDValue N3,
22295                                             ISD::CondCode CC) {
22296   // If this is a select where the false operand is zero and the compare is a
22297   // check of the sign bit, see if we can perform the "gzip trick":
22298   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
22299   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
22300   EVT XType = N0.getValueType();
22301   EVT AType = N2.getValueType();
22302   if (!isNullConstant(N3) || !XType.bitsGE(AType))
22303     return SDValue();
22304
22305   // If the comparison is testing for a positive value, we have to invert
22306   // the sign bit mask, so only do that transform if the target has a bitwise
22307   // 'and not' instruction (the invert is free).
22308   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
22309     // (X > -1) ? A : 0
22310     // (X >  0) ? X : 0 <-- This is canonical signed max.
22311     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
22312       return SDValue();
22313   } else if (CC == ISD::SETLT) {
22314     // (X <  0) ? A : 0
22315     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
22316     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
22317       return SDValue();
22318   } else {
22319     return SDValue();
22320   }
22321
22322   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
22323   // constant.
22324   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
22325   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22326   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
22327     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
22328     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
22329       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22330       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
22331       AddToWorklist(Shift.getNode());
22332
22333       if (XType.bitsGT(AType)) {
22334         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22335         AddToWorklist(Shift.getNode());
22336       }
22337
22338       if (CC == ISD::SETGT)
22339         Shift = DAG.getNOT(DL, Shift, AType);
22340
22341       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22342     }
22343   }
22344
22345   unsigned ShCt = XType.getSizeInBits() - 1;
22346   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
22347     return SDValue();
22348
22349   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22350   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
22351   AddToWorklist(Shift.getNode());
22352
22353   if (XType.bitsGT(AType)) {
22354     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22355     AddToWorklist(Shift.getNode());
22356   }
22357
22358   if (CC == ISD::SETGT)
22359     Shift = DAG.getNOT(DL, Shift, AType);
22360
22361   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22362 }
22363
22364 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
22365 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
22366   SDValue N0 = N->getOperand(0);
22367   SDValue N1 = N->getOperand(1);
22368   SDValue N2 = N->getOperand(2);
22369   EVT VT = N->getValueType(0);
22370   SDLoc DL(N);
22371
22372   unsigned BinOpc = N1.getOpcode();
22373   if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
22374     return SDValue();
22375
22376   if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode()))
22377     return SDValue();
22378
22379   // Fold select(cond, binop(x, y), binop(z, y))
22380   //  --> binop(select(cond, x, z), y)
22381   if (N1.getOperand(1) == N2.getOperand(1)) {
22382     SDValue NewSel =
22383         DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
22384     SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1));
22385     NewBinOp->setFlags(N1->getFlags());
22386     NewBinOp->intersectFlagsWith(N2->getFlags());
22387     return NewBinOp;
22388   }
22389
22390   // Fold select(cond, binop(x, y), binop(x, z))
22391   //  --> binop(x, select(cond, y, z))
22392   // Second op VT might be different (e.g. shift amount type)
22393   if (N1.getOperand(0) == N2.getOperand(0) &&
22394       VT == N1.getOperand(1).getValueType() &&
22395       VT == N2.getOperand(1).getValueType()) {
22396     SDValue NewSel =
22397         DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
22398     SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel);
22399     NewBinOp->setFlags(N1->getFlags());
22400     NewBinOp->intersectFlagsWith(N2->getFlags());
22401     return NewBinOp;
22402   }
22403
22404   // TODO: Handle isCommutativeBinOp patterns as well?
22405   return SDValue();
22406 }
22407
22408 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
22409 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
22410   SDValue N0 = N->getOperand(0);
22411   EVT VT = N->getValueType(0);
22412   bool IsFabs = N->getOpcode() == ISD::FABS;
22413   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
22414
22415   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
22416     return SDValue();
22417
22418   SDValue Int = N0.getOperand(0);
22419   EVT IntVT = Int.getValueType();
22420
22421   // The operand to cast should be integer.
22422   if (!IntVT.isInteger() || IntVT.isVector())
22423     return SDValue();
22424
22425   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
22426   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
22427   APInt SignMask;
22428   if (N0.getValueType().isVector()) {
22429     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
22430     // 0x7f...) per element and splat it.
22431     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
22432     if (IsFabs)
22433       SignMask = ~SignMask;
22434     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
22435   } else {
22436     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
22437     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
22438     if (IsFabs)
22439       SignMask = ~SignMask;
22440   }
22441   SDLoc DL(N0);
22442   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
22443                     DAG.getConstant(SignMask, DL, IntVT));
22444   AddToWorklist(Int.getNode());
22445   return DAG.getBitcast(VT, Int);
22446 }
22447
22448 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
22449 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
22450 /// in it. This may be a win when the constant is not otherwise available
22451 /// because it replaces two constant pool loads with one.
22452 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
22453     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
22454     ISD::CondCode CC) {
22455   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
22456     return SDValue();
22457
22458   // If we are before legalize types, we want the other legalization to happen
22459   // first (for example, to avoid messing with soft float).
22460   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
22461   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
22462   EVT VT = N2.getValueType();
22463   if (!TV || !FV || !TLI.isTypeLegal(VT))
22464     return SDValue();
22465
22466   // If a constant can be materialized without loads, this does not make sense.
22467   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
22468       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
22469       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
22470     return SDValue();
22471
22472   // If both constants have multiple uses, then we won't need to do an extra
22473   // load. The values are likely around in registers for other users.
22474   if (!TV->hasOneUse() && !FV->hasOneUse())
22475     return SDValue();
22476
22477   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
22478                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
22479   Type *FPTy = Elts[0]->getType();
22480   const DataLayout &TD = DAG.getDataLayout();
22481
22482   // Create a ConstantArray of the two constants.
22483   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
22484   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
22485                                       TD.getPrefTypeAlign(FPTy));
22486   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
22487
22488   // Get offsets to the 0 and 1 elements of the array, so we can select between
22489   // them.
22490   SDValue Zero = DAG.getIntPtrConstant(0, DL);
22491   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
22492   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
22493   SDValue Cond =
22494       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
22495   AddToWorklist(Cond.getNode());
22496   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
22497   AddToWorklist(CstOffset.getNode());
22498   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
22499   AddToWorklist(CPIdx.getNode());
22500   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
22501                      MachinePointerInfo::getConstantPool(
22502                          DAG.getMachineFunction()), Alignment);
22503 }
22504
22505 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
22506 /// where 'cond' is the comparison specified by CC.
22507 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
22508                                       SDValue N2, SDValue N3, ISD::CondCode CC,
22509                                       bool NotExtCompare) {
22510   // (x ? y : y) -> y.
22511   if (N2 == N3) return N2;
22512
22513   EVT CmpOpVT = N0.getValueType();
22514   EVT CmpResVT = getSetCCResultType(CmpOpVT);
22515   EVT VT = N2.getValueType();
22516   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
22517   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22518   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
22519
22520   // Determine if the condition we're dealing with is constant.
22521   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
22522     AddToWorklist(SCC.getNode());
22523     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
22524       // fold select_cc true, x, y -> x
22525       // fold select_cc false, x, y -> y
22526       return !(SCCC->isNullValue()) ? N2 : N3;
22527     }
22528   }
22529
22530   if (SDValue V =
22531           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
22532     return V;
22533
22534   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
22535     return V;
22536
22537   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
22538   // where y is has a single bit set.
22539   // A plaintext description would be, we can turn the SELECT_CC into an AND
22540   // when the condition can be materialized as an all-ones register.  Any
22541   // single bit-test can be materialized as an all-ones register with
22542   // shift-left and shift-right-arith.
22543   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
22544       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
22545     SDValue AndLHS = N0->getOperand(0);
22546     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
22547     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
22548       // Shift the tested bit over the sign bit.
22549       const APInt &AndMask = ConstAndRHS->getAPIntValue();
22550       unsigned ShCt = AndMask.getBitWidth() - 1;
22551       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
22552         SDValue ShlAmt =
22553           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
22554                           getShiftAmountTy(AndLHS.getValueType()));
22555         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
22556
22557         // Now arithmetic right shift it all the way over, so the result is
22558         // either all-ones, or zero.
22559         SDValue ShrAmt =
22560           DAG.getConstant(ShCt, SDLoc(Shl),
22561                           getShiftAmountTy(Shl.getValueType()));
22562         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
22563
22564         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
22565       }
22566     }
22567   }
22568
22569   // fold select C, 16, 0 -> shl C, 4
22570   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
22571   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
22572
22573   if ((Fold || Swap) &&
22574       TLI.getBooleanContents(CmpOpVT) ==
22575           TargetLowering::ZeroOrOneBooleanContent &&
22576       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
22577
22578     if (Swap) {
22579       CC = ISD::getSetCCInverse(CC, CmpOpVT);
22580       std::swap(N2C, N3C);
22581     }
22582
22583     // If the caller doesn't want us to simplify this into a zext of a compare,
22584     // don't do it.
22585     if (NotExtCompare && N2C->isOne())
22586       return SDValue();
22587
22588     SDValue Temp, SCC;
22589     // zext (setcc n0, n1)
22590     if (LegalTypes) {
22591       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
22592       if (VT.bitsLT(SCC.getValueType()))
22593         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
22594       else
22595         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22596     } else {
22597       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
22598       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22599     }
22600
22601     AddToWorklist(SCC.getNode());
22602     AddToWorklist(Temp.getNode());
22603
22604     if (N2C->isOne())
22605       return Temp;
22606
22607     unsigned ShCt = N2C->getAPIntValue().logBase2();
22608     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
22609       return SDValue();
22610
22611     // shl setcc result by log2 n2c
22612     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
22613                        DAG.getConstant(ShCt, SDLoc(Temp),
22614                                        getShiftAmountTy(Temp.getValueType())));
22615   }
22616
22617   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
22618   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
22619   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
22620   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
22621   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
22622   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
22623   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
22624   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
22625   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
22626     SDValue ValueOnZero = N2;
22627     SDValue Count = N3;
22628     // If the condition is NE instead of E, swap the operands.
22629     if (CC == ISD::SETNE)
22630       std::swap(ValueOnZero, Count);
22631     // Check if the value on zero is a constant equal to the bits in the type.
22632     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
22633       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
22634         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
22635         // legal, combine to just cttz.
22636         if ((Count.getOpcode() == ISD::CTTZ ||
22637              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
22638             N0 == Count.getOperand(0) &&
22639             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
22640           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
22641         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
22642         // legal, combine to just ctlz.
22643         if ((Count.getOpcode() == ISD::CTLZ ||
22644              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
22645             N0 == Count.getOperand(0) &&
22646             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
22647           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
22648       }
22649     }
22650   }
22651
22652   return SDValue();
22653 }
22654
22655 /// This is a stub for TargetLowering::SimplifySetCC.
22656 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
22657                                    ISD::CondCode Cond, const SDLoc &DL,
22658                                    bool foldBooleans) {
22659   TargetLowering::DAGCombinerInfo
22660     DagCombineInfo(DAG, Level, false, this);
22661   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
22662 }
22663
22664 /// Given an ISD::SDIV node expressing a divide by constant, return
22665 /// a DAG expression to select that will generate the same value by multiplying
22666 /// by a magic number.
22667 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22668 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
22669   // when optimising for minimum size, we don't want to expand a div to a mul
22670   // and a shift.
22671   if (DAG.getMachineFunction().getFunction().hasMinSize())
22672     return SDValue();
22673
22674   SmallVector<SDNode *, 8> Built;
22675   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
22676     for (SDNode *N : Built)
22677       AddToWorklist(N);
22678     return S;
22679   }
22680
22681   return SDValue();
22682 }
22683
22684 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
22685 /// DAG expression that will generate the same value by right shifting.
22686 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
22687   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
22688   if (!C)
22689     return SDValue();
22690
22691   // Avoid division by zero.
22692   if (C->isNullValue())
22693     return SDValue();
22694
22695   SmallVector<SDNode *, 8> Built;
22696   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
22697     for (SDNode *N : Built)
22698       AddToWorklist(N);
22699     return S;
22700   }
22701
22702   return SDValue();
22703 }
22704
22705 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
22706 /// expression that will generate the same value by multiplying by a magic
22707 /// number.
22708 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22709 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
22710   // when optimising for minimum size, we don't want to expand a div to a mul
22711   // and a shift.
22712   if (DAG.getMachineFunction().getFunction().hasMinSize())
22713     return SDValue();
22714
22715   SmallVector<SDNode *, 8> Built;
22716   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
22717     for (SDNode *N : Built)
22718       AddToWorklist(N);
22719     return S;
22720   }
22721
22722   return SDValue();
22723 }
22724
22725 /// Determines the LogBase2 value for a non-null input value using the
22726 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
22727 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
22728   EVT VT = V.getValueType();
22729   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
22730   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
22731   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
22732   return LogBase2;
22733 }
22734
22735 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22736 /// For the reciprocal, we need to find the zero of the function:
22737 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
22738 ///     =>
22739 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
22740 ///     does not require additional intermediate precision]
22741 /// For the last iteration, put numerator N into it to gain more precision:
22742 ///   Result = N X_i + X_i (N - N A X_i)
22743 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
22744                                       SDNodeFlags Flags) {
22745   if (LegalDAG)
22746     return SDValue();
22747
22748   // TODO: Handle half and/or extended types?
22749   EVT VT = Op.getValueType();
22750   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22751     return SDValue();
22752
22753   // If estimates are explicitly disabled for this function, we're done.
22754   MachineFunction &MF = DAG.getMachineFunction();
22755   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
22756   if (Enabled == TLI.ReciprocalEstimate::Disabled)
22757     return SDValue();
22758
22759   // Estimates may be explicitly enabled for this type with a custom number of
22760   // refinement steps.
22761   int Iterations = TLI.getDivRefinementSteps(VT, MF);
22762   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
22763     AddToWorklist(Est.getNode());
22764
22765     SDLoc DL(Op);
22766     if (Iterations) {
22767       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
22768
22769       // Newton iterations: Est = Est + Est (N - Arg * Est)
22770       // If this is the last iteration, also multiply by the numerator.
22771       for (int i = 0; i < Iterations; ++i) {
22772         SDValue MulEst = Est;
22773
22774         if (i == Iterations - 1) {
22775           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
22776           AddToWorklist(MulEst.getNode());
22777         }
22778
22779         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
22780         AddToWorklist(NewEst.getNode());
22781
22782         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
22783                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
22784         AddToWorklist(NewEst.getNode());
22785
22786         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22787         AddToWorklist(NewEst.getNode());
22788
22789         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
22790         AddToWorklist(Est.getNode());
22791       }
22792     } else {
22793       // If no iterations are available, multiply with N.
22794       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
22795       AddToWorklist(Est.getNode());
22796     }
22797
22798     return Est;
22799   }
22800
22801   return SDValue();
22802 }
22803
22804 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22805 /// For the reciprocal sqrt, we need to find the zero of the function:
22806 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
22807 ///     =>
22808 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
22809 /// As a result, we precompute A/2 prior to the iteration loop.
22810 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
22811                                          unsigned Iterations,
22812                                          SDNodeFlags Flags, bool Reciprocal) {
22813   EVT VT = Arg.getValueType();
22814   SDLoc DL(Arg);
22815   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
22816
22817   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
22818   // this entire sequence requires only one FP constant.
22819   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
22820   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
22821
22822   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
22823   for (unsigned i = 0; i < Iterations; ++i) {
22824     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
22825     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
22826     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
22827     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22828   }
22829
22830   // If non-reciprocal square root is requested, multiply the result by Arg.
22831   if (!Reciprocal)
22832     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
22833
22834   return Est;
22835 }
22836
22837 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22838 /// For the reciprocal sqrt, we need to find the zero of the function:
22839 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
22840 ///     =>
22841 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
22842 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
22843                                          unsigned Iterations,
22844                                          SDNodeFlags Flags, bool Reciprocal) {
22845   EVT VT = Arg.getValueType();
22846   SDLoc DL(Arg);
22847   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
22848   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
22849
22850   // This routine must enter the loop below to work correctly
22851   // when (Reciprocal == false).
22852   assert(Iterations > 0);
22853
22854   // Newton iterations for reciprocal square root:
22855   // E = (E * -0.5) * ((A * E) * E + -3.0)
22856   for (unsigned i = 0; i < Iterations; ++i) {
22857     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
22858     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
22859     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
22860
22861     // When calculating a square root at the last iteration build:
22862     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
22863     // (notice a common subexpression)
22864     SDValue LHS;
22865     if (Reciprocal || (i + 1) < Iterations) {
22866       // RSQRT: LHS = (E * -0.5)
22867       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
22868     } else {
22869       // SQRT: LHS = (A * E) * -0.5
22870       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
22871     }
22872
22873     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
22874   }
22875
22876   return Est;
22877 }
22878
22879 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
22880 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
22881 /// Op can be zero.
22882 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
22883                                            bool Reciprocal) {
22884   if (LegalDAG)
22885     return SDValue();
22886
22887   // TODO: Handle half and/or extended types?
22888   EVT VT = Op.getValueType();
22889   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22890     return SDValue();
22891
22892   // If estimates are explicitly disabled for this function, we're done.
22893   MachineFunction &MF = DAG.getMachineFunction();
22894   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
22895   if (Enabled == TLI.ReciprocalEstimate::Disabled)
22896     return SDValue();
22897
22898   // Estimates may be explicitly enabled for this type with a custom number of
22899   // refinement steps.
22900   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
22901
22902   bool UseOneConstNR = false;
22903   if (SDValue Est =
22904       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
22905                           Reciprocal)) {
22906     AddToWorklist(Est.getNode());
22907
22908     if (Iterations)
22909       Est = UseOneConstNR
22910             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
22911             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
22912     if (!Reciprocal) {
22913       SDLoc DL(Op);
22914       // Try the target specific test first.
22915       SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
22916
22917       // The estimate is now completely wrong if the input was exactly 0.0 or
22918       // possibly a denormal. Force the answer to 0.0 or value provided by
22919       // target for those cases.
22920       Est = DAG.getNode(
22921           Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
22922           Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
22923     }
22924     return Est;
22925   }
22926
22927   return SDValue();
22928 }
22929
22930 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
22931   return buildSqrtEstimateImpl(Op, Flags, true);
22932 }
22933
22934 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
22935   return buildSqrtEstimateImpl(Op, Flags, false);
22936 }
22937
22938 /// Return true if there is any possibility that the two addresses overlap.
22939 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
22940
22941   struct MemUseCharacteristics {
22942     bool IsVolatile;
22943     bool IsAtomic;
22944     SDValue BasePtr;
22945     int64_t Offset;
22946     Optional<int64_t> NumBytes;
22947     MachineMemOperand *MMO;
22948   };
22949
22950   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
22951     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
22952       int64_t Offset = 0;
22953       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
22954         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
22955                      ? C->getSExtValue()
22956                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
22957                            ? -1 * C->getSExtValue()
22958                            : 0;
22959       uint64_t Size =
22960           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
22961       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
22962               Offset /*base offset*/,
22963               Optional<int64_t>(Size),
22964               LSN->getMemOperand()};
22965     }
22966     if (const auto *LN = cast<LifetimeSDNode>(N))
22967       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
22968               (LN->hasOffset()) ? LN->getOffset() : 0,
22969               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
22970                                 : Optional<int64_t>(),
22971               (MachineMemOperand *)nullptr};
22972     // Default.
22973     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
22974             (int64_t)0 /*offset*/,
22975             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
22976   };
22977
22978   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
22979                         MUC1 = getCharacteristics(Op1);
22980
22981   // If they are to the same address, then they must be aliases.
22982   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
22983       MUC0.Offset == MUC1.Offset)
22984     return true;
22985
22986   // If they are both volatile then they cannot be reordered.
22987   if (MUC0.IsVolatile && MUC1.IsVolatile)
22988     return true;
22989
22990   // Be conservative about atomics for the moment
22991   // TODO: This is way overconservative for unordered atomics (see D66309)
22992   if (MUC0.IsAtomic && MUC1.IsAtomic)
22993     return true;
22994
22995   if (MUC0.MMO && MUC1.MMO) {
22996     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
22997         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
22998       return false;
22999   }
23000
23001   // Try to prove that there is aliasing, or that there is no aliasing. Either
23002   // way, we can return now. If nothing can be proved, proceed with more tests.
23003   bool IsAlias;
23004   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
23005                                        DAG, IsAlias))
23006     return IsAlias;
23007
23008   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
23009   // either are not known.
23010   if (!MUC0.MMO || !MUC1.MMO)
23011     return true;
23012
23013   // If one operation reads from invariant memory, and the other may store, they
23014   // cannot alias. These should really be checking the equivalent of mayWrite,
23015   // but it only matters for memory nodes other than load /store.
23016   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23017       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23018     return false;
23019
23020   // If we know required SrcValue1 and SrcValue2 have relatively large
23021   // alignment compared to the size and offset of the access, we may be able
23022   // to prove they do not alias. This check is conservative for now to catch
23023   // cases created by splitting vector types, it only works when the offsets are
23024   // multiples of the size of the data.
23025   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
23026   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
23027   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
23028   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
23029   auto &Size0 = MUC0.NumBytes;
23030   auto &Size1 = MUC1.NumBytes;
23031   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
23032       Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
23033       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
23034       SrcValOffset1 % *Size1 == 0) {
23035     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
23036     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
23037
23038     // There is no overlap between these relatively aligned accesses of
23039     // similar size. Return no alias.
23040     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
23041       return false;
23042   }
23043
23044   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
23045                    ? CombinerGlobalAA
23046                    : DAG.getSubtarget().useAA();
23047 #ifndef NDEBUG
23048   if (CombinerAAOnlyFunc.getNumOccurrences() &&
23049       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
23050     UseAA = false;
23051 #endif
23052
23053   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
23054       Size0.hasValue() && Size1.hasValue()) {
23055     // Use alias analysis information.
23056     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
23057     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
23058     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
23059     if (AA->isNoAlias(
23060             MemoryLocation(MUC0.MMO->getValue(), Overlap0,
23061                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
23062             MemoryLocation(MUC1.MMO->getValue(), Overlap1,
23063                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
23064       return false;
23065   }
23066
23067   // Otherwise we have to assume they alias.
23068   return true;
23069 }
23070
23071 /// Walk up chain skipping non-aliasing memory nodes,
23072 /// looking for aliasing nodes and adding them to the Aliases vector.
23073 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
23074                                    SmallVectorImpl<SDValue> &Aliases) {
23075   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
23076   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
23077
23078   // Get alias information for node.
23079   // TODO: relax aliasing for unordered atomics (see D66309)
23080   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
23081
23082   // Starting off.
23083   Chains.push_back(OriginalChain);
23084   unsigned Depth = 0;
23085
23086   // Attempt to improve chain by a single step
23087   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
23088     switch (C.getOpcode()) {
23089     case ISD::EntryToken:
23090       // No need to mark EntryToken.
23091       C = SDValue();
23092       return true;
23093     case ISD::LOAD:
23094     case ISD::STORE: {
23095       // Get alias information for C.
23096       // TODO: Relax aliasing for unordered atomics (see D66309)
23097       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
23098                       cast<LSBaseSDNode>(C.getNode())->isSimple();
23099       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
23100         // Look further up the chain.
23101         C = C.getOperand(0);
23102         return true;
23103       }
23104       // Alias, so stop here.
23105       return false;
23106     }
23107
23108     case ISD::CopyFromReg:
23109       // Always forward past past CopyFromReg.
23110       C = C.getOperand(0);
23111       return true;
23112
23113     case ISD::LIFETIME_START:
23114     case ISD::LIFETIME_END: {
23115       // We can forward past any lifetime start/end that can be proven not to
23116       // alias the memory access.
23117       if (!isAlias(N, C.getNode())) {
23118         // Look further up the chain.
23119         C = C.getOperand(0);
23120         return true;
23121       }
23122       return false;
23123     }
23124     default:
23125       return false;
23126     }
23127   };
23128
23129   // Look at each chain and determine if it is an alias.  If so, add it to the
23130   // aliases list.  If not, then continue up the chain looking for the next
23131   // candidate.
23132   while (!Chains.empty()) {
23133     SDValue Chain = Chains.pop_back_val();
23134
23135     // Don't bother if we've seen Chain before.
23136     if (!Visited.insert(Chain.getNode()).second)
23137       continue;
23138
23139     // For TokenFactor nodes, look at each operand and only continue up the
23140     // chain until we reach the depth limit.
23141     //
23142     // FIXME: The depth check could be made to return the last non-aliasing
23143     // chain we found before we hit a tokenfactor rather than the original
23144     // chain.
23145     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
23146       Aliases.clear();
23147       Aliases.push_back(OriginalChain);
23148       return;
23149     }
23150
23151     if (Chain.getOpcode() == ISD::TokenFactor) {
23152       // We have to check each of the operands of the token factor for "small"
23153       // token factors, so we queue them up.  Adding the operands to the queue
23154       // (stack) in reverse order maintains the original order and increases the
23155       // likelihood that getNode will find a matching token factor (CSE.)
23156       if (Chain.getNumOperands() > 16) {
23157         Aliases.push_back(Chain);
23158         continue;
23159       }
23160       for (unsigned n = Chain.getNumOperands(); n;)
23161         Chains.push_back(Chain.getOperand(--n));
23162       ++Depth;
23163       continue;
23164     }
23165     // Everything else
23166     if (ImproveChain(Chain)) {
23167       // Updated Chain Found, Consider new chain if one exists.
23168       if (Chain.getNode())
23169         Chains.push_back(Chain);
23170       ++Depth;
23171       continue;
23172     }
23173     // No Improved Chain Possible, treat as Alias.
23174     Aliases.push_back(Chain);
23175   }
23176 }
23177
23178 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
23179 /// (aliasing node.)
23180 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
23181   if (OptLevel == CodeGenOpt::None)
23182     return OldChain;
23183
23184   // Ops for replacing token factor.
23185   SmallVector<SDValue, 8> Aliases;
23186
23187   // Accumulate all the aliases to this node.
23188   GatherAllAliases(N, OldChain, Aliases);
23189
23190   // If no operands then chain to entry token.
23191   if (Aliases.size() == 0)
23192     return DAG.getEntryNode();
23193
23194   // If a single operand then chain to it.  We don't need to revisit it.
23195   if (Aliases.size() == 1)
23196     return Aliases[0];
23197
23198   // Construct a custom tailored token factor.
23199   return DAG.getTokenFactor(SDLoc(N), Aliases);
23200 }
23201
23202 namespace {
23203 // TODO: Replace with with std::monostate when we move to C++17.
23204 struct UnitT { } Unit;
23205 bool operator==(const UnitT &, const UnitT &) { return true; }
23206 bool operator!=(const UnitT &, const UnitT &) { return false; }
23207 } // namespace
23208
23209 // This function tries to collect a bunch of potentially interesting
23210 // nodes to improve the chains of, all at once. This might seem
23211 // redundant, as this function gets called when visiting every store
23212 // node, so why not let the work be done on each store as it's visited?
23213 //
23214 // I believe this is mainly important because mergeConsecutiveStores
23215 // is unable to deal with merging stores of different sizes, so unless
23216 // we improve the chains of all the potential candidates up-front
23217 // before running mergeConsecutiveStores, it might only see some of
23218 // the nodes that will eventually be candidates, and then not be able
23219 // to go from a partially-merged state to the desired final
23220 // fully-merged state.
23221
23222 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
23223   SmallVector<StoreSDNode *, 8> ChainedStores;
23224   StoreSDNode *STChain = St;
23225   // Intervals records which offsets from BaseIndex have been covered. In
23226   // the common case, every store writes to the immediately previous address
23227   // space and thus merged with the previous interval at insertion time.
23228
23229   using IMap =
23230       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
23231   IMap::Allocator A;
23232   IMap Intervals(A);
23233
23234   // This holds the base pointer, index, and the offset in bytes from the base
23235   // pointer.
23236   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23237
23238   // We must have a base and an offset.
23239   if (!BasePtr.getBase().getNode())
23240     return false;
23241
23242   // Do not handle stores to undef base pointers.
23243   if (BasePtr.getBase().isUndef())
23244     return false;
23245
23246   // BaseIndexOffset assumes that offsets are fixed-size, which
23247   // is not valid for scalable vectors where the offsets are
23248   // scaled by `vscale`, so bail out early.
23249   if (St->getMemoryVT().isScalableVector())
23250     return false;
23251
23252   // Add ST's interval.
23253   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
23254
23255   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
23256     if (Chain->getMemoryVT().isScalableVector())
23257       return false;
23258
23259     // If the chain has more than one use, then we can't reorder the mem ops.
23260     if (!SDValue(Chain, 0)->hasOneUse())
23261       break;
23262     // TODO: Relax for unordered atomics (see D66309)
23263     if (!Chain->isSimple() || Chain->isIndexed())
23264       break;
23265
23266     // Find the base pointer and offset for this memory node.
23267     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
23268     // Check that the base pointer is the same as the original one.
23269     int64_t Offset;
23270     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
23271       break;
23272     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
23273     // Make sure we don't overlap with other intervals by checking the ones to
23274     // the left or right before inserting.
23275     auto I = Intervals.find(Offset);
23276     // If there's a next interval, we should end before it.
23277     if (I != Intervals.end() && I.start() < (Offset + Length))
23278       break;
23279     // If there's a previous interval, we should start after it.
23280     if (I != Intervals.begin() && (--I).stop() <= Offset)
23281       break;
23282     Intervals.insert(Offset, Offset + Length, Unit);
23283
23284     ChainedStores.push_back(Chain);
23285     STChain = Chain;
23286   }
23287
23288   // If we didn't find a chained store, exit.
23289   if (ChainedStores.size() == 0)
23290     return false;
23291
23292   // Improve all chained stores (St and ChainedStores members) starting from
23293   // where the store chain ended and return single TokenFactor.
23294   SDValue NewChain = STChain->getChain();
23295   SmallVector<SDValue, 8> TFOps;
23296   for (unsigned I = ChainedStores.size(); I;) {
23297     StoreSDNode *S = ChainedStores[--I];
23298     SDValue BetterChain = FindBetterChain(S, NewChain);
23299     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
23300         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
23301     TFOps.push_back(SDValue(S, 0));
23302     ChainedStores[I] = S;
23303   }
23304
23305   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
23306   SDValue BetterChain = FindBetterChain(St, NewChain);
23307   SDValue NewST;
23308   if (St->isTruncatingStore())
23309     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
23310                               St->getBasePtr(), St->getMemoryVT(),
23311                               St->getMemOperand());
23312   else
23313     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
23314                          St->getBasePtr(), St->getMemOperand());
23315
23316   TFOps.push_back(NewST);
23317
23318   // If we improved every element of TFOps, then we've lost the dependence on
23319   // NewChain to successors of St and we need to add it back to TFOps. Do so at
23320   // the beginning to keep relative order consistent with FindBetterChains.
23321   auto hasImprovedChain = [&](SDValue ST) -> bool {
23322     return ST->getOperand(0) != NewChain;
23323   };
23324   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
23325   if (AddNewChain)
23326     TFOps.insert(TFOps.begin(), NewChain);
23327
23328   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
23329   CombineTo(St, TF);
23330
23331   // Add TF and its operands to the worklist.
23332   AddToWorklist(TF.getNode());
23333   for (const SDValue &Op : TF->ops())
23334     AddToWorklist(Op.getNode());
23335   AddToWorklist(STChain);
23336   return true;
23337 }
23338
23339 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
23340   if (OptLevel == CodeGenOpt::None)
23341     return false;
23342
23343   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23344
23345   // We must have a base and an offset.
23346   if (!BasePtr.getBase().getNode())
23347     return false;
23348
23349   // Do not handle stores to undef base pointers.
23350   if (BasePtr.getBase().isUndef())
23351     return false;
23352
23353   // Directly improve a chain of disjoint stores starting at St.
23354   if (parallelizeChainedStores(St))
23355     return true;
23356
23357   // Improve St's Chain..
23358   SDValue BetterChain = FindBetterChain(St, St->getChain());
23359   if (St->getChain() != BetterChain) {
23360     replaceStoreChain(St, BetterChain);
23361     return true;
23362   }
23363   return false;
23364 }
23365
23366 /// This is the entry point for the file.
23367 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
23368                            CodeGenOpt::Level OptLevel) {
23369   /// This is the main entry point to this class.
23370   DAGCombiner(*this, AA, OptLevel).Run(Level);
23371 }