llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  10 // both before and after the DAG is legalized.
  11 //
  12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  13 // primarily intended to handle simplification opportunities that are implicit
  14 // in the LLVM IR and exposed by the various codegen lowering phases.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #include "llvm/ADT/APFloat.h"
  19 #include "llvm/ADT/APInt.h"
  20 #include "llvm/ADT/ArrayRef.h"
  21 #include "llvm/ADT/DenseMap.h"
  22 #include "llvm/ADT/IntervalMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallBitVector.h"
  28 #include "llvm/ADT/SmallPtrSet.h"
  29 #include "llvm/ADT/SmallSet.h"
  30 #include "llvm/ADT/SmallVector.h"
  31 #include "llvm/ADT/Statistic.h"
  32 #include "llvm/Analysis/AliasAnalysis.h"
  33 #include "llvm/Analysis/MemoryLocation.h"
  34 #include "llvm/Analysis/TargetLibraryInfo.h"
  35 #include "llvm/Analysis/VectorUtils.h"
  36 #include "llvm/CodeGen/DAGCombine.h"
  37 #include "llvm/CodeGen/ISDOpcodes.h"
  38 #include "llvm/CodeGen/MachineFrameInfo.h"
  39 #include "llvm/CodeGen/MachineFunction.h"
  40 #include "llvm/CodeGen/MachineMemOperand.h"
  41 #include "llvm/CodeGen/RuntimeLibcalls.h"
  42 #include "llvm/CodeGen/SelectionDAG.h"
  43 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  44 #include "llvm/CodeGen/SelectionDAGNodes.h"
  45 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  46 #include "llvm/CodeGen/TargetLowering.h"
  47 #include "llvm/CodeGen/TargetRegisterInfo.h"
  48 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  49 #include "llvm/CodeGen/ValueTypes.h"
  50 #include "llvm/IR/Attributes.h"
  51 #include "llvm/IR/Constant.h"
  52 #include "llvm/IR/DataLayout.h"
  53 #include "llvm/IR/DerivedTypes.h"
  54 #include "llvm/IR/Function.h"
  55 #include "llvm/IR/LLVMContext.h"
  56 #include "llvm/IR/Metadata.h"
  57 #include "llvm/Support/Casting.h"
  58 #include "llvm/Support/CodeGen.h"
  59 #include "llvm/Support/CommandLine.h"
  60 #include "llvm/Support/Compiler.h"
  61 #include "llvm/Support/Debug.h"
  62 #include "llvm/Support/ErrorHandling.h"
  63 #include "llvm/Support/KnownBits.h"
  64 #include "llvm/Support/MachineValueType.h"
  65 #include "llvm/Support/MathExtras.h"
  66 #include "llvm/Support/raw_ostream.h"
  67 #include "llvm/Target/TargetMachine.h"
  68 #include "llvm/Target/TargetOptions.h"
  69 #include <algorithm>
  70 #include <cassert>
  71 #include <cstdint>
  72 #include <functional>
  73 #include <iterator>
  74 #include <string>
  75 #include <tuple>
  76 #include <utility>
  77
  78 using namespace llvm;
  79
  80 #define DEBUG_TYPE "dagcombine"
  81
  82 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  85 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  86 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  87 STATISTIC(SlicedLoads, "Number of load sliced");
  88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  89
  90 static cl::opt<bool>
  91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  92                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  93
  94 static cl::opt<bool>
  95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  96         cl::desc("Enable DAG combiner's use of TBAA"));
  97
  98 #ifndef NDEBUG
  99 static cl::opt<std::string>
 100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
 101                    cl::desc("Only use DAG-combiner alias analysis in this"
 102                             " function"));
 103 #endif
 104
 105 /// Hidden option to stress test load slicing, i.e., when this option
 106 /// is enabled, load slicing bypasses most of its profitability guards.
 107 static cl::opt<bool>
 108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 109                   cl::desc("Bypass the profitability model of load slicing"),
 110                   cl::init(false));
 111
 112 static cl::opt<bool>
 113   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 114                     cl::desc("DAG combiner may split indexing from loads"));
 115
 116 static cl::opt<bool>
 117     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
 118                        cl::desc("DAG combiner enable merging multiple stores "
 119                                 "into a wider store"));
 120
 121 static cl::opt<unsigned> TokenFactorInlineLimit(
 122     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
 123     cl::desc("Limit the number of operands to inline for Token Factors"));
 124
 125 static cl::opt<unsigned> StoreMergeDependenceLimit(
 126     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
 127     cl::desc("Limit the number of times for the same StoreNode and RootNode "
 128              "to bail out in store merging dependence check"));
 129
 130 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
 131     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
 132     cl::desc("DAG cominber enable reducing the width of load/op/store "
 133              "sequence"));
 134
 135 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
 136     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
 137     cl::desc("DAG cominber enable load/<replace bytes>/store with "
 138              "a narrower store"));
 139
 140 namespace {
 141
 142   class DAGCombiner {
 143     SelectionDAG &DAG;
 144     const TargetLowering &TLI;
 145     const SelectionDAGTargetInfo *STI;
 146     CombineLevel Level;
 147     CodeGenOpt::Level OptLevel;
 148     bool LegalDAG = false;
 149     bool LegalOperations = false;
 150     bool LegalTypes = false;
 151     bool ForCodeSize;
 152     bool DisableGenericCombines;
 153
 154     /// Worklist of all of the nodes that need to be simplified.
 155     ///
 156     /// This must behave as a stack -- new nodes to process are pushed onto the
 157     /// back and when processing we pop off of the back.
 158     ///
 159     /// The worklist will not contain duplicates but may contain null entries
 160     /// due to nodes being deleted from the underlying DAG.
 161     SmallVector<SDNode *, 64> Worklist;
 162
 163     /// Mapping from an SDNode to its position on the worklist.
 164     ///
 165     /// This is used to find and remove nodes from the worklist (by nulling
 166     /// them) when they are deleted from the underlying DAG. It relies on
 167     /// stable indices of nodes within the worklist.
 168     DenseMap<SDNode *, unsigned> WorklistMap;
 169     /// This records all nodes attempted to add to the worklist since we
 170     /// considered a new worklist entry. As we keep do not add duplicate nodes
 171     /// in the worklist, this is different from the tail of the worklist.
 172     SmallSetVector<SDNode *, 32> PruningList;
 173
 174     /// Set of nodes which have been combined (at least once).
 175     ///
 176     /// This is used to allow us to reliably add any operands of a DAG node
 177     /// which have not yet been combined to the worklist.
 178     SmallPtrSet<SDNode *, 32> CombinedNodes;
 179
 180     /// Map from candidate StoreNode to the pair of RootNode and count.
 181     /// The count is used to track how many times we have seen the StoreNode
 182     /// with the same RootNode bail out in dependence check. If we have seen
 183     /// the bail out for the same pair many times over a limit, we won't
 184     /// consider the StoreNode with the same RootNode as store merging
 185     /// candidate again.
 186     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
 187
 188     // AA - Used for DAG load/store alias analysis.
 189     AliasAnalysis *AA;
 190
 191     /// When an instruction is simplified, add all users of the instruction to
 192     /// the work lists because they might get more simplified now.
 193     void AddUsersToWorklist(SDNode *N) {
 194       for (SDNode *Node : N->uses())
 195         AddToWorklist(Node);
 196     }
 197
 198     /// Convenient shorthand to add a node and all of its user to the worklist.
 199     void AddToWorklistWithUsers(SDNode *N) {
 200       AddUsersToWorklist(N);
 201       AddToWorklist(N);
 202     }
 203
 204     // Prune potentially dangling nodes. This is called after
 205     // any visit to a node, but should also be called during a visit after any
 206     // failed combine which may have created a DAG node.
 207     void clearAddedDanglingWorklistEntries() {
 208       // Check any nodes added to the worklist to see if they are prunable.
 209       while (!PruningList.empty()) {
 210         auto *N = PruningList.pop_back_val();
 211         if (N->use_empty())
 212           recursivelyDeleteUnusedNodes(N);
 213       }
 214     }
 215
 216     SDNode *getNextWorklistEntry() {
 217       // Before we do any work, remove nodes that are not in use.
 218       clearAddedDanglingWorklistEntries();
 219       SDNode *N = nullptr;
 220       // The Worklist holds the SDNodes in order, but it may contain null
 221       // entries.
 222       while (!N && !Worklist.empty()) {
 223         N = Worklist.pop_back_val();
 224       }
 225
 226       if (N) {
 227         bool GoodWorklistEntry = WorklistMap.erase(N);
 228         (void)GoodWorklistEntry;
 229         assert(GoodWorklistEntry &&
 230                "Found a worklist entry without a corresponding map entry!");
 231       }
 232       return N;
 233     }
 234
 235     /// Call the node-specific routine that folds each particular type of node.
 236     SDValue visit(SDNode *N);
 237
 238   public:
 239     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 240         : DAG(D), TLI(D.getTargetLoweringInfo()),
 241           STI(D.getSubtarget().getSelectionDAGInfo()),
 242           Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
 243       ForCodeSize = DAG.shouldOptForSize();
 244       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
 245
 246       MaximumLegalStoreInBits = 0;
 247       // We use the minimum store size here, since that's all we can guarantee
 248       // for the scalable vector types.
 249       for (MVT VT : MVT::all_valuetypes())
 250         if (EVT(VT).isSimple() && VT != MVT::Other &&
 251             TLI.isTypeLegal(EVT(VT)) &&
 252             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
 253           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
 254     }
 255
 256     void ConsiderForPruning(SDNode *N) {
 257       // Mark this for potential pruning.
 258       PruningList.insert(N);
 259     }
 260
 261     /// Add to the worklist making sure its instance is at the back (next to be
 262     /// processed.)
 263     void AddToWorklist(SDNode *N) {
 264       assert(N->getOpcode() != ISD::DELETED_NODE &&
 265              "Deleted Node added to Worklist");
 266
 267       // Skip handle nodes as they can't usefully be combined and confuse the
 268       // zero-use deletion strategy.
 269       if (N->getOpcode() == ISD::HANDLENODE)
 270         return;
 271
 272       ConsiderForPruning(N);
 273
 274       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 275         Worklist.push_back(N);
 276     }
 277
 278     /// Remove all instances of N from the worklist.
 279     void removeFromWorklist(SDNode *N) {
 280       CombinedNodes.erase(N);
 281       PruningList.remove(N);
 282       StoreRootCountMap.erase(N);
 283
 284       auto It = WorklistMap.find(N);
 285       if (It == WorklistMap.end())
 286         return; // Not in the worklist.
 287
 288       // Null out the entry rather than erasing it to avoid a linear operation.
 289       Worklist[It->second] = nullptr;
 290       WorklistMap.erase(It);
 291     }
 292
 293     void deleteAndRecombine(SDNode *N);
 294     bool recursivelyDeleteUnusedNodes(SDNode *N);
 295
 296     /// Replaces all uses of the results of one DAG node with new values.
 297     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 298                       bool AddTo = true);
 299
 300     /// Replaces all uses of the results of one DAG node with new values.
 301     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 302       return CombineTo(N, &Res, 1, AddTo);
 303     }
 304
 305     /// Replaces all uses of the results of one DAG node with new values.
 306     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 307                       bool AddTo = true) {
 308       SDValue To[] = { Res0, Res1 };
 309       return CombineTo(N, To, 2, AddTo);
 310     }
 311
 312     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 313
 314   private:
 315     unsigned MaximumLegalStoreInBits;
 316
 317     /// Check the specified integer node value to see if it can be simplified or
 318     /// if things it uses can be simplified by bit propagation.
 319     /// If so, return true.
 320     bool SimplifyDemandedBits(SDValue Op) {
 321       unsigned BitWidth = Op.getScalarValueSizeInBits();
 322       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
 323       return SimplifyDemandedBits(Op, DemandedBits);
 324     }
 325
 326     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
 327       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
 328       KnownBits Known;
 329       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
 330         return false;
 331
 332       // Revisit the node.
 333       AddToWorklist(Op.getNode());
 334
 335       CommitTargetLoweringOpt(TLO);
 336       return true;
 337     }
 338
 339     /// Check the specified vector node value to see if it can be simplified or
 340     /// if things it uses can be simplified as it only uses some of the
 341     /// elements. If so, return true.
 342     bool SimplifyDemandedVectorElts(SDValue Op) {
 343       // TODO: For now just pretend it cannot be simplified.
 344       if (Op.getValueType().isScalableVector())
 345         return false;
 346
 347       unsigned NumElts = Op.getValueType().getVectorNumElements();
 348       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 349       return SimplifyDemandedVectorElts(Op, DemandedElts);
 350     }
 351
 352     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 353                               const APInt &DemandedElts,
 354                               bool AssumeSingleUse = false);
 355     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
 356                                     bool AssumeSingleUse = false);
 357
 358     bool CombineToPreIndexedLoadStore(SDNode *N);
 359     bool CombineToPostIndexedLoadStore(SDNode *N);
 360     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 361     bool SliceUpLoad(SDNode *N);
 362
 363     // Scalars have size 0 to distinguish from singleton vectors.
 364     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
 365     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
 366     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
 367
 368     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 369     ///   load.
 370     ///
 371     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 372     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 373     /// \param EltNo index of the vector element to load.
 374     /// \param OriginalLoad load that EVE came from to be replaced.
 375     /// \returns EVE on success SDValue() on failure.
 376     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 377                                          SDValue EltNo,
 378                                          LoadSDNode *OriginalLoad);
 379     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 380     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 381     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 382     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 383     SDValue PromoteIntBinOp(SDValue Op);
 384     SDValue PromoteIntShiftOp(SDValue Op);
 385     SDValue PromoteExtend(SDValue Op);
 386     bool PromoteLoad(SDValue Op);
 387
 388     /// Call the node-specific routine that knows how to fold each
 389     /// particular type of node. If that doesn't do anything, try the
 390     /// target-specific DAG combines.
 391     SDValue combine(SDNode *N);
 392
 393     // Visitation implementation - Implement dag node combining for different
 394     // node types.  The semantics are as follows:
 395     // Return Value:
 396     //   SDValue.getNode() == 0 - No change was made
 397     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 398     //   otherwise              - N should be replaced by the returned Operand.
 399     //
 400     SDValue visitTokenFactor(SDNode *N);
 401     SDValue visitMERGE_VALUES(SDNode *N);
 402     SDValue visitADD(SDNode *N);
 403     SDValue visitADDLike(SDNode *N);
 404     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
 405     SDValue visitSUB(SDNode *N);
 406     SDValue visitADDSAT(SDNode *N);
 407     SDValue visitSUBSAT(SDNode *N);
 408     SDValue visitADDC(SDNode *N);
 409     SDValue visitADDO(SDNode *N);
 410     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 411     SDValue visitSUBC(SDNode *N);
 412     SDValue visitSUBO(SDNode *N);
 413     SDValue visitADDE(SDNode *N);
 414     SDValue visitADDCARRY(SDNode *N);
 415     SDValue visitSADDO_CARRY(SDNode *N);
 416     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 417     SDValue visitSUBE(SDNode *N);
 418     SDValue visitSUBCARRY(SDNode *N);
 419     SDValue visitSSUBO_CARRY(SDNode *N);
 420     SDValue visitMUL(SDNode *N);
 421     SDValue visitMULFIX(SDNode *N);
 422     SDValue useDivRem(SDNode *N);
 423     SDValue visitSDIV(SDNode *N);
 424     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 425     SDValue visitUDIV(SDNode *N);
 426     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 427     SDValue visitREM(SDNode *N);
 428     SDValue visitMULHU(SDNode *N);
 429     SDValue visitMULHS(SDNode *N);
 430     SDValue visitSMUL_LOHI(SDNode *N);
 431     SDValue visitUMUL_LOHI(SDNode *N);
 432     SDValue visitMULO(SDNode *N);
 433     SDValue visitIMINMAX(SDNode *N);
 434     SDValue visitAND(SDNode *N);
 435     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 436     SDValue visitOR(SDNode *N);
 437     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 438     SDValue visitXOR(SDNode *N);
 439     SDValue SimplifyVBinOp(SDNode *N);
 440     SDValue visitSHL(SDNode *N);
 441     SDValue visitSRA(SDNode *N);
 442     SDValue visitSRL(SDNode *N);
 443     SDValue visitFunnelShift(SDNode *N);
 444     SDValue visitRotate(SDNode *N);
 445     SDValue visitABS(SDNode *N);
 446     SDValue visitBSWAP(SDNode *N);
 447     SDValue visitBITREVERSE(SDNode *N);
 448     SDValue visitCTLZ(SDNode *N);
 449     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 450     SDValue visitCTTZ(SDNode *N);
 451     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 452     SDValue visitCTPOP(SDNode *N);
 453     SDValue visitSELECT(SDNode *N);
 454     SDValue visitVSELECT(SDNode *N);
 455     SDValue visitSELECT_CC(SDNode *N);
 456     SDValue visitSETCC(SDNode *N);
 457     SDValue visitSETCCCARRY(SDNode *N);
 458     SDValue visitSIGN_EXTEND(SDNode *N);
 459     SDValue visitZERO_EXTEND(SDNode *N);
 460     SDValue visitANY_EXTEND(SDNode *N);
 461     SDValue visitAssertExt(SDNode *N);
 462     SDValue visitAssertAlign(SDNode *N);
 463     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 464     SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
 465     SDValue visitTRUNCATE(SDNode *N);
 466     SDValue visitBITCAST(SDNode *N);
 467     SDValue visitFREEZE(SDNode *N);
 468     SDValue visitBUILD_PAIR(SDNode *N);
 469     SDValue visitFADD(SDNode *N);
 470     SDValue visitSTRICT_FADD(SDNode *N);
 471     SDValue visitFSUB(SDNode *N);
 472     SDValue visitFMUL(SDNode *N);
 473     SDValue visitFMA(SDNode *N);
 474     SDValue visitFDIV(SDNode *N);
 475     SDValue visitFREM(SDNode *N);
 476     SDValue visitFSQRT(SDNode *N);
 477     SDValue visitFCOPYSIGN(SDNode *N);
 478     SDValue visitFPOW(SDNode *N);
 479     SDValue visitSINT_TO_FP(SDNode *N);
 480     SDValue visitUINT_TO_FP(SDNode *N);
 481     SDValue visitFP_TO_SINT(SDNode *N);
 482     SDValue visitFP_TO_UINT(SDNode *N);
 483     SDValue visitFP_ROUND(SDNode *N);
 484     SDValue visitFP_EXTEND(SDNode *N);
 485     SDValue visitFNEG(SDNode *N);
 486     SDValue visitFABS(SDNode *N);
 487     SDValue visitFCEIL(SDNode *N);
 488     SDValue visitFTRUNC(SDNode *N);
 489     SDValue visitFFLOOR(SDNode *N);
 490     SDValue visitFMINNUM(SDNode *N);
 491     SDValue visitFMAXNUM(SDNode *N);
 492     SDValue visitFMINIMUM(SDNode *N);
 493     SDValue visitFMAXIMUM(SDNode *N);
 494     SDValue visitBRCOND(SDNode *N);
 495     SDValue visitBR_CC(SDNode *N);
 496     SDValue visitLOAD(SDNode *N);
 497
 498     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 499     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 500
 501     SDValue visitSTORE(SDNode *N);
 502     SDValue visitLIFETIME_END(SDNode *N);
 503     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 504     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 505     SDValue visitBUILD_VECTOR(SDNode *N);
 506     SDValue visitCONCAT_VECTORS(SDNode *N);
 507     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 508     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 509     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 510     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 511     SDValue visitMLOAD(SDNode *N);
 512     SDValue visitMSTORE(SDNode *N);
 513     SDValue visitMGATHER(SDNode *N);
 514     SDValue visitMSCATTER(SDNode *N);
 515     SDValue visitFP_TO_FP16(SDNode *N);
 516     SDValue visitFP16_TO_FP(SDNode *N);
 517     SDValue visitVECREDUCE(SDNode *N);
 518
 519     SDValue visitFADDForFMACombine(SDNode *N);
 520     SDValue visitFSUBForFMACombine(SDNode *N);
 521     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 522
 523     SDValue XformToShuffleWithZero(SDNode *N);
 524     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
 525                                                     const SDLoc &DL, SDValue N0,
 526                                                     SDValue N1);
 527     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
 528                                       SDValue N1);
 529     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 530                            SDValue N1, SDNodeFlags Flags);
 531
 532     SDValue visitShiftByConstant(SDNode *N);
 533
 534     SDValue foldSelectOfConstants(SDNode *N);
 535     SDValue foldVSelectOfConstants(SDNode *N);
 536     SDValue foldBinOpIntoSelect(SDNode *BO);
 537     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 538     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
 539     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 540     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 541                              SDValue N2, SDValue N3, ISD::CondCode CC,
 542                              bool NotExtCompare = false);
 543     SDValue convertSelectOfFPConstantsToLoadOffset(
 544         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
 545         ISD::CondCode CC);
 546     SDValue foldSignChangeInBitcast(SDNode *N);
 547     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 548                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 549     SDValue foldSelectOfBinops(SDNode *N);
 550     SDValue foldSextSetcc(SDNode *N);
 551     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 552                               const SDLoc &DL);
 553     SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
 554     SDValue unfoldMaskedMerge(SDNode *N);
 555     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 556     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 557                           const SDLoc &DL, bool foldBooleans);
 558     SDValue rebuildSetCC(SDValue N);
 559
 560     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 561                            SDValue &CC, bool MatchStrict = false) const;
 562     bool isOneUseSetCC(SDValue N) const;
 563
 564     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 565                                          unsigned HiOp);
 566     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 567     SDValue CombineExtLoad(SDNode *N);
 568     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 569     SDValue combineRepeatedFPDivisors(SDNode *N);
 570     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 571     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 572     SDValue BuildSDIV(SDNode *N);
 573     SDValue BuildSDIVPow2(SDNode *N);
 574     SDValue BuildUDIV(SDNode *N);
 575     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 576     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
 577     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 578     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 579     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 580     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 581                                 SDNodeFlags Flags, bool Reciprocal);
 582     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 583                                 SDNodeFlags Flags, bool Reciprocal);
 584     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 585                                bool DemandHighBits = true);
 586     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 587     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 588                               SDValue InnerPos, SDValue InnerNeg,
 589                               unsigned PosOpcode, unsigned NegOpcode,
 590                               const SDLoc &DL);
 591     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
 592                               SDValue InnerPos, SDValue InnerNeg,
 593                               unsigned PosOpcode, unsigned NegOpcode,
 594                               const SDLoc &DL);
 595     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 596     SDValue MatchLoadCombine(SDNode *N);
 597     SDValue mergeTruncStores(StoreSDNode *N);
 598     SDValue ReduceLoadWidth(SDNode *N);
 599     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 600     SDValue splitMergedValStore(StoreSDNode *ST);
 601     SDValue TransformFPLoadStorePair(SDNode *N);
 602     SDValue convertBuildVecZextToZext(SDNode *N);
 603     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 604     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
 605     SDValue reduceBuildVecToShuffle(SDNode *N);
 606     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 607                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 608                                   SDValue VecIn2, unsigned LeftIdx,
 609                                   bool DidSplitVec);
 610     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 611
 612     /// Walk up chain skipping non-aliasing memory nodes,
 613     /// looking for aliasing nodes and adding them to the Aliases vector.
 614     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 615                           SmallVectorImpl<SDValue> &Aliases);
 616
 617     /// Return true if there is any possibility that the two addresses overlap.
 618     bool isAlias(SDNode *Op0, SDNode *Op1) const;
 619
 620     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 621     /// chain (aliasing node.)
 622     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 623
 624     /// Try to replace a store and any possibly adjacent stores on
 625     /// consecutive chains with better chains. Return true only if St is
 626     /// replaced.
 627     ///
 628     /// Notice that other chains may still be replaced even if the function
 629     /// returns false.
 630     bool findBetterNeighborChains(StoreSDNode *St);
 631
 632     // Helper for findBetterNeighborChains. Walk up store chain add additional
 633     // chained stores that do not overlap and can be parallelized.
 634     bool parallelizeChainedStores(StoreSDNode *St);
 635
 636     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 637     /// is located in a sequence of memory operations connected by a chain.
 638     struct MemOpLink {
 639       // Ptr to the mem node.
 640       LSBaseSDNode *MemNode;
 641
 642       // Offset from the base ptr.
 643       int64_t OffsetFromBase;
 644
 645       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 646           : MemNode(N), OffsetFromBase(Offset) {}
 647     };
 648
 649     // Classify the origin of a stored value.
 650     enum class StoreSource { Unknown, Constant, Extract, Load };
 651     StoreSource getStoreSource(SDValue StoreVal) {
 652       switch (StoreVal.getOpcode()) {
 653       case ISD::Constant:
 654       case ISD::ConstantFP:
 655         return StoreSource::Constant;
 656       case ISD::EXTRACT_VECTOR_ELT:
 657       case ISD::EXTRACT_SUBVECTOR:
 658         return StoreSource::Extract;
 659       case ISD::LOAD:
 660         return StoreSource::Load;
 661       default:
 662         return StoreSource::Unknown;
 663       }
 664     }
 665
 666     /// This is a helper function for visitMUL to check the profitability
 667     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 668     /// MulNode is the original multiply, AddNode is (add x, c1),
 669     /// and ConstNode is c2.
 670     bool isMulAddWithConstProfitable(SDNode *MulNode,
 671                                      SDValue &AddNode,
 672                                      SDValue &ConstNode);
 673
 674     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 675     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 676     /// the type of the loaded value to be extended.
 677     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 678                           EVT LoadResultTy, EVT &ExtVT);
 679
 680     /// Helper function to calculate whether the given Load/Store can have its
 681     /// width reduced to ExtVT.
 682     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 683                            EVT &MemVT, unsigned ShAmt = 0);
 684
 685     /// Used by BackwardsPropagateMask to find suitable loads.
 686     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
 687                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 688                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 689     /// Attempt to propagate a given AND node back to load leaves so that they
 690     /// can be combined into narrow loads.
 691     bool BackwardsPropagateMask(SDNode *N);
 692
 693     /// Helper function for mergeConsecutiveStores which merges the component
 694     /// store chains.
 695     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 696                                 unsigned NumStores);
 697
 698     /// This is a helper function for mergeConsecutiveStores. When the source
 699     /// elements of the consecutive stores are all constants or all extracted
 700     /// vector elements, try to merge them into one larger store introducing
 701     /// bitcasts if necessary.  \return True if a merged store was created.
 702     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 703                                          EVT MemVT, unsigned NumStores,
 704                                          bool IsConstantSrc, bool UseVector,
 705                                          bool UseTrunc);
 706
 707     /// This is a helper function for mergeConsecutiveStores. Stores that
 708     /// potentially may be merged with St are placed in StoreNodes. RootNode is
 709     /// a chain predecessor to all store candidates.
 710     void getStoreMergeCandidates(StoreSDNode *St,
 711                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 712                                  SDNode *&Root);
 713
 714     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
 715     /// have indirect dependency through their operands. RootNode is the
 716     /// predecessor to all stores calculated by getStoreMergeCandidates and is
 717     /// used to prune the dependency check. \return True if safe to merge.
 718     bool checkMergeStoreCandidatesForDependencies(
 719         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 720         SDNode *RootNode);
 721
 722     /// This is a helper function for mergeConsecutiveStores. Given a list of
 723     /// store candidates, find the first N that are consecutive in memory.
 724     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
 725     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
 726                                   int64_t ElementSizeBytes) const;
 727
 728     /// This is a helper function for mergeConsecutiveStores. It is used for
 729     /// store chains that are composed entirely of constant values.
 730     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
 731                                   unsigned NumConsecutiveStores,
 732                                   EVT MemVT, SDNode *Root, bool AllowVectors);
 733
 734     /// This is a helper function for mergeConsecutiveStores. It is used for
 735     /// store chains that are composed entirely of extracted vector elements.
 736     /// When extracting multiple vector elements, try to store them in one
 737     /// vector store rather than a sequence of scalar stores.
 738     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
 739                                  unsigned NumConsecutiveStores, EVT MemVT,
 740                                  SDNode *Root);
 741
 742     /// This is a helper function for mergeConsecutiveStores. It is used for
 743     /// store chains that are composed entirely of loaded values.
 744     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
 745                               unsigned NumConsecutiveStores, EVT MemVT,
 746                               SDNode *Root, bool AllowVectors,
 747                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
 748
 749     /// Merge consecutive store operations into a wide store.
 750     /// This optimization uses wide integers or vectors when possible.
 751     /// \return true if stores were merged.
 752     bool mergeConsecutiveStores(StoreSDNode *St);
 753
 754     /// Try to transform a truncation where C is a constant:
 755     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 756     ///
 757     /// \p N needs to be a truncation and its first operand an AND. Other
 758     /// requirements are checked by the function (e.g. that trunc is
 759     /// single-use) and if missed an empty SDValue is returned.
 760     SDValue distributeTruncateThroughAnd(SDNode *N);
 761
 762     /// Helper function to determine whether the target supports operation
 763     /// given by \p Opcode for type \p VT, that is, whether the operation
 764     /// is legal or custom before legalizing operations, and whether is
 765     /// legal (but not custom) after legalization.
 766     bool hasOperation(unsigned Opcode, EVT VT) {
 767       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
 768     }
 769
 770   public:
 771     /// Runs the dag combiner on all nodes in the work list
 772     void Run(CombineLevel AtLevel);
 773
 774     SelectionDAG &getDAG() const { return DAG; }
 775
 776     /// Returns a type large enough to hold any valid shift amount - before type
 777     /// legalization these can be huge.
 778     EVT getShiftAmountTy(EVT LHSTy) {
 779       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 780       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 781     }
 782
 783     /// This method returns true if we are running before type legalization or
 784     /// if the specified VT is legal.
 785     bool isTypeLegal(const EVT &VT) {
 786       if (!LegalTypes) return true;
 787       return TLI.isTypeLegal(VT);
 788     }
 789
 790     /// Convenience wrapper around TargetLowering::getSetCCResultType
 791     EVT getSetCCResultType(EVT VT) const {
 792       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 793     }
 794
 795     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 796                          SDValue OrigLoad, SDValue ExtLoad,
 797                          ISD::NodeType ExtType);
 798   };
 799
 800 /// This class is a DAGUpdateListener that removes any deleted
 801 /// nodes from the worklist.
 802 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 803   DAGCombiner &DC;
 804
 805 public:
 806   explicit WorklistRemover(DAGCombiner &dc)
 807     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 808
 809   void NodeDeleted(SDNode *N, SDNode *E) override {
 810     DC.removeFromWorklist(N);
 811   }
 812 };
 813
 814 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
 815   DAGCombiner &DC;
 816
 817 public:
 818   explicit WorklistInserter(DAGCombiner &dc)
 819       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 820
 821   // FIXME: Ideally we could add N to the worklist, but this causes exponential
 822   //        compile time costs in large DAGs, e.g. Halide.
 823   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
 824 };
 825
 826 } // end anonymous namespace
 827
 828 //===----------------------------------------------------------------------===//
 829 //  TargetLowering::DAGCombinerInfo implementation
 830 //===----------------------------------------------------------------------===//
 831
 832 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 833   ((DAGCombiner*)DC)->AddToWorklist(N);
 834 }
 835
 836 SDValue TargetLowering::DAGCombinerInfo::
 837 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 838   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 839 }
 840
 841 SDValue TargetLowering::DAGCombinerInfo::
 842 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 843   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 844 }
 845
 846 SDValue TargetLowering::DAGCombinerInfo::
 847 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 848   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 849 }
 850
 851 bool TargetLowering::DAGCombinerInfo::
 852 recursivelyDeleteUnusedNodes(SDNode *N) {
 853   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
 854 }
 855
 856 void TargetLowering::DAGCombinerInfo::
 857 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 858   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 859 }
 860
 861 //===----------------------------------------------------------------------===//
 862 // Helper Functions
 863 //===----------------------------------------------------------------------===//
 864
 865 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 866   removeFromWorklist(N);
 867
 868   // If the operands of this node are only used by the node, they will now be
 869   // dead. Make sure to re-visit them and recursively delete dead nodes.
 870   for (const SDValue &Op : N->ops())
 871     // For an operand generating multiple values, one of the values may
 872     // become dead allowing further simplification (e.g. split index
 873     // arithmetic from an indexed load).
 874     if (Op->hasOneUse() || Op->getNumValues() > 1)
 875       AddToWorklist(Op.getNode());
 876
 877   DAG.DeleteNode(N);
 878 }
 879
 880 // APInts must be the same size for most operations, this helper
 881 // function zero extends the shorter of the pair so that they match.
 882 // We provide an Offset so that we can create bitwidths that won't overflow.
 883 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 884   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 885   LHS = LHS.zextOrSelf(Bits);
 886   RHS = RHS.zextOrSelf(Bits);
 887 }
 888
 889 // Return true if this node is a setcc, or is a select_cc
 890 // that selects between the target values used for true and false, making it
 891 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 892 // the appropriate nodes based on the type of node we are checking. This
 893 // simplifies life a bit for the callers.
 894 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 895                                     SDValue &CC, bool MatchStrict) const {
 896   if (N.getOpcode() == ISD::SETCC) {
 897     LHS = N.getOperand(0);
 898     RHS = N.getOperand(1);
 899     CC  = N.getOperand(2);
 900     return true;
 901   }
 902
 903   if (MatchStrict &&
 904       (N.getOpcode() == ISD::STRICT_FSETCC ||
 905        N.getOpcode() == ISD::STRICT_FSETCCS)) {
 906     LHS = N.getOperand(1);
 907     RHS = N.getOperand(2);
 908     CC  = N.getOperand(3);
 909     return true;
 910   }
 911
 912   if (N.getOpcode() != ISD::SELECT_CC ||
 913       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 914       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 915     return false;
 916
 917   if (TLI.getBooleanContents(N.getValueType()) ==
 918       TargetLowering::UndefinedBooleanContent)
 919     return false;
 920
 921   LHS = N.getOperand(0);
 922   RHS = N.getOperand(1);
 923   CC  = N.getOperand(4);
 924   return true;
 925 }
 926
 927 /// Return true if this is a SetCC-equivalent operation with only one use.
 928 /// If this is true, it allows the users to invert the operation for free when
 929 /// it is profitable to do so.
 930 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 931   SDValue N0, N1, N2;
 932   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 933     return true;
 934   return false;
 935 }
 936
 937 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
 938   if (!ScalarTy.isSimple())
 939     return false;
 940
 941   uint64_t MaskForTy = 0ULL;
 942   switch (ScalarTy.getSimpleVT().SimpleTy) {
 943   case MVT::i8:
 944     MaskForTy = 0xFFULL;
 945     break;
 946   case MVT::i16:
 947     MaskForTy = 0xFFFFULL;
 948     break;
 949   case MVT::i32:
 950     MaskForTy = 0xFFFFFFFFULL;
 951     break;
 952   default:
 953     return false;
 954     break;
 955   }
 956
 957   APInt Val;
 958   if (ISD::isConstantSplatVector(N, Val))
 959     return Val.getLimitedValue() == MaskForTy;
 960
 961   return false;
 962 }
 963
 964 // Determines if it is a constant integer or a splat/build vector of constant
 965 // integers (and undefs).
 966 // Do not permit build vector implicit truncation.
 967 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 968   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 969     return !(Const->isOpaque() && NoOpaques);
 970   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
 971     return false;
 972   unsigned BitWidth = N.getScalarValueSizeInBits();
 973   for (const SDValue &Op : N->op_values()) {
 974     if (Op.isUndef())
 975       continue;
 976     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 977     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 978         (Const->isOpaque() && NoOpaques))
 979       return false;
 980   }
 981   return true;
 982 }
 983
 984 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 985 // undef's.
 986 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
 987   if (V.getOpcode() != ISD::BUILD_VECTOR)
 988     return false;
 989   return isConstantOrConstantVector(V, NoOpaques) ||
 990          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 991 }
 992
 993 // Determine if this an indexed load with an opaque target constant index.
 994 static bool canSplitIdx(LoadSDNode *LD) {
 995   return MaySplitLoadIndex &&
 996          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
 997           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
 998 }
 999
1000 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1001                                                              const SDLoc &DL,
1002                                                              SDValue N0,
1003                                                              SDValue N1) {
1004   // Currently this only tries to ensure we don't undo the GEP splits done by
1005   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1006   // we check if the following transformation would be problematic:
1007   // (load/store (add, (add, x, offset1), offset2)) ->
1008   // (load/store (add, x, offset1+offset2)).
1009
1010   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1011     return false;
1012
1013   if (N0.hasOneUse())
1014     return false;
1015
1016   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1017   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1018   if (!C1 || !C2)
1019     return false;
1020
1021   const APInt &C1APIntVal = C1->getAPIntValue();
1022   const APInt &C2APIntVal = C2->getAPIntValue();
1023   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1024     return false;
1025
1026   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1027   if (CombinedValueIntVal.getBitWidth() > 64)
1028     return false;
1029   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1030
1031   for (SDNode *Node : N0->uses()) {
1032     auto LoadStore = dyn_cast<MemSDNode>(Node);
1033     if (LoadStore) {
1034       // Is x[offset2] already not a legal addressing mode? If so then
1035       // reassociating the constants breaks nothing (we test offset2 because
1036       // that's the one we hope to fold into the load or store).
1037       TargetLoweringBase::AddrMode AM;
1038       AM.HasBaseReg = true;
1039       AM.BaseOffs = C2APIntVal.getSExtValue();
1040       EVT VT = LoadStore->getMemoryVT();
1041       unsigned AS = LoadStore->getAddressSpace();
1042       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1043       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1044         continue;
1045
1046       // Would x[offset1+offset2] still be a legal addressing mode?
1047       AM.BaseOffs = CombinedValue;
1048       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1049         return true;
1050     }
1051   }
1052
1053   return false;
1054 }
1055
1056 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1057 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1058 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1059                                                SDValue N0, SDValue N1) {
1060   EVT VT = N0.getValueType();
1061
1062   if (N0.getOpcode() != Opc)
1063     return SDValue();
1064
1065   if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1066     if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1067       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1068       if (SDValue OpNode =
1069               DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1070         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1071       return SDValue();
1072     }
1073     if (N0.hasOneUse()) {
1074       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1075       //              iff (op x, c1) has one use
1076       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1077       if (!OpNode.getNode())
1078         return SDValue();
1079       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1080     }
1081   }
1082   return SDValue();
1083 }
1084
1085 // Try to reassociate commutative binops.
1086 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1087                                     SDValue N1, SDNodeFlags Flags) {
1088   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1089
1090   // Floating-point reassociation is not allowed without loose FP math.
1091   if (N0.getValueType().isFloatingPoint() ||
1092       N1.getValueType().isFloatingPoint())
1093     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1094       return SDValue();
1095
1096   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1097     return Combined;
1098   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1099     return Combined;
1100   return SDValue();
1101 }
1102
1103 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1104                                bool AddTo) {
1105   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1106   ++NodesCombined;
1107   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1108              To[0].getNode()->dump(&DAG);
1109              dbgs() << " and " << NumTo - 1 << " other values\n");
1110   for (unsigned i = 0, e = NumTo; i != e; ++i)
1111     assert((!To[i].getNode() ||
1112             N->getValueType(i) == To[i].getValueType()) &&
1113            "Cannot combine value to value of different type!");
1114
1115   WorklistRemover DeadNodes(*this);
1116   DAG.ReplaceAllUsesWith(N, To);
1117   if (AddTo) {
1118     // Push the new nodes and any users onto the worklist
1119     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1120       if (To[i].getNode()) {
1121         AddToWorklist(To[i].getNode());
1122         AddUsersToWorklist(To[i].getNode());
1123       }
1124     }
1125   }
1126
1127   // Finally, if the node is now dead, remove it from the graph.  The node
1128   // may not be dead if the replacement process recursively simplified to
1129   // something else needing this node.
1130   if (N->use_empty())
1131     deleteAndRecombine(N);
1132   return SDValue(N, 0);
1133 }
1134
1135 void DAGCombiner::
1136 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1137   // Replace the old value with the new one.
1138   ++NodesCombined;
1139   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1140              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1141              dbgs() << '\n');
1142
1143   // Replace all uses.  If any nodes become isomorphic to other nodes and
1144   // are deleted, make sure to remove them from our worklist.
1145   WorklistRemover DeadNodes(*this);
1146   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1147
1148   // Push the new node and any (possibly new) users onto the worklist.
1149   AddToWorklistWithUsers(TLO.New.getNode());
1150
1151   // Finally, if the node is now dead, remove it from the graph.  The node
1152   // may not be dead if the replacement process recursively simplified to
1153   // something else needing this node.
1154   if (TLO.Old.getNode()->use_empty())
1155     deleteAndRecombine(TLO.Old.getNode());
1156 }
1157
1158 /// Check the specified integer node value to see if it can be simplified or if
1159 /// things it uses can be simplified by bit propagation. If so, return true.
1160 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1161                                        const APInt &DemandedElts,
1162                                        bool AssumeSingleUse) {
1163   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1164   KnownBits Known;
1165   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1166                                 AssumeSingleUse))
1167     return false;
1168
1169   // Revisit the node.
1170   AddToWorklist(Op.getNode());
1171
1172   CommitTargetLoweringOpt(TLO);
1173   return true;
1174 }
1175
1176 /// Check the specified vector node value to see if it can be simplified or
1177 /// if things it uses can be simplified as it only uses some of the elements.
1178 /// If so, return true.
1179 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1180                                              const APInt &DemandedElts,
1181                                              bool AssumeSingleUse) {
1182   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1183   APInt KnownUndef, KnownZero;
1184   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1185                                       TLO, 0, AssumeSingleUse))
1186     return false;
1187
1188   // Revisit the node.
1189   AddToWorklist(Op.getNode());
1190
1191   CommitTargetLoweringOpt(TLO);
1192   return true;
1193 }
1194
1195 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1196   SDLoc DL(Load);
1197   EVT VT = Load->getValueType(0);
1198   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1199
1200   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1201              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1202   WorklistRemover DeadNodes(*this);
1203   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1204   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1205   deleteAndRecombine(Load);
1206   AddToWorklist(Trunc.getNode());
1207 }
1208
1209 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1210   Replace = false;
1211   SDLoc DL(Op);
1212   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1213     LoadSDNode *LD = cast<LoadSDNode>(Op);
1214     EVT MemVT = LD->getMemoryVT();
1215     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1216                                                       : LD->getExtensionType();
1217     Replace = true;
1218     return DAG.getExtLoad(ExtType, DL, PVT,
1219                           LD->getChain(), LD->getBasePtr(),
1220                           MemVT, LD->getMemOperand());
1221   }
1222
1223   unsigned Opc = Op.getOpcode();
1224   switch (Opc) {
1225   default: break;
1226   case ISD::AssertSext:
1227     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1228       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1229     break;
1230   case ISD::AssertZext:
1231     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1232       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1233     break;
1234   case ISD::Constant: {
1235     unsigned ExtOpc =
1236       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1237     return DAG.getNode(ExtOpc, DL, PVT, Op);
1238   }
1239   }
1240
1241   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1242     return SDValue();
1243   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1244 }
1245
1246 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1247   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1248     return SDValue();
1249   EVT OldVT = Op.getValueType();
1250   SDLoc DL(Op);
1251   bool Replace = false;
1252   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1253   if (!NewOp.getNode())
1254     return SDValue();
1255   AddToWorklist(NewOp.getNode());
1256
1257   if (Replace)
1258     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1259   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1260                      DAG.getValueType(OldVT));
1261 }
1262
1263 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1264   EVT OldVT = Op.getValueType();
1265   SDLoc DL(Op);
1266   bool Replace = false;
1267   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1268   if (!NewOp.getNode())
1269     return SDValue();
1270   AddToWorklist(NewOp.getNode());
1271
1272   if (Replace)
1273     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1274   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1275 }
1276
1277 /// Promote the specified integer binary operation if the target indicates it is
1278 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1279 /// i32 since i16 instructions are longer.
1280 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1281   if (!LegalOperations)
1282     return SDValue();
1283
1284   EVT VT = Op.getValueType();
1285   if (VT.isVector() || !VT.isInteger())
1286     return SDValue();
1287
1288   // If operation type is 'undesirable', e.g. i16 on x86, consider
1289   // promoting it.
1290   unsigned Opc = Op.getOpcode();
1291   if (TLI.isTypeDesirableForOp(Opc, VT))
1292     return SDValue();
1293
1294   EVT PVT = VT;
1295   // Consult target whether it is a good idea to promote this operation and
1296   // what's the right type to promote it to.
1297   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1298     assert(PVT != VT && "Don't know what type to promote to!");
1299
1300     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1301
1302     bool Replace0 = false;
1303     SDValue N0 = Op.getOperand(0);
1304     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1305
1306     bool Replace1 = false;
1307     SDValue N1 = Op.getOperand(1);
1308     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1309     SDLoc DL(Op);
1310
1311     SDValue RV =
1312         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1313
1314     // We are always replacing N0/N1's use in N and only need additional
1315     // replacements if there are additional uses.
1316     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1317     //       (SDValue) here because the node may reference multiple values
1318     //       (for example, the chain value of a load node).
1319     Replace0 &= !N0->hasOneUse();
1320     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1321
1322     // Combine Op here so it is preserved past replacements.
1323     CombineTo(Op.getNode(), RV);
1324
1325     // If operands have a use ordering, make sure we deal with
1326     // predecessor first.
1327     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1328       std::swap(N0, N1);
1329       std::swap(NN0, NN1);
1330     }
1331
1332     if (Replace0) {
1333       AddToWorklist(NN0.getNode());
1334       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1335     }
1336     if (Replace1) {
1337       AddToWorklist(NN1.getNode());
1338       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1339     }
1340     return Op;
1341   }
1342   return SDValue();
1343 }
1344
1345 /// Promote the specified integer shift operation if the target indicates it is
1346 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1347 /// i32 since i16 instructions are longer.
1348 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1349   if (!LegalOperations)
1350     return SDValue();
1351
1352   EVT VT = Op.getValueType();
1353   if (VT.isVector() || !VT.isInteger())
1354     return SDValue();
1355
1356   // If operation type is 'undesirable', e.g. i16 on x86, consider
1357   // promoting it.
1358   unsigned Opc = Op.getOpcode();
1359   if (TLI.isTypeDesirableForOp(Opc, VT))
1360     return SDValue();
1361
1362   EVT PVT = VT;
1363   // Consult target whether it is a good idea to promote this operation and
1364   // what's the right type to promote it to.
1365   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1366     assert(PVT != VT && "Don't know what type to promote to!");
1367
1368     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1369
1370     bool Replace = false;
1371     SDValue N0 = Op.getOperand(0);
1372     SDValue N1 = Op.getOperand(1);
1373     if (Opc == ISD::SRA)
1374       N0 = SExtPromoteOperand(N0, PVT);
1375     else if (Opc == ISD::SRL)
1376       N0 = ZExtPromoteOperand(N0, PVT);
1377     else
1378       N0 = PromoteOperand(N0, PVT, Replace);
1379
1380     if (!N0.getNode())
1381       return SDValue();
1382
1383     SDLoc DL(Op);
1384     SDValue RV =
1385         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1386
1387     if (Replace)
1388       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1389
1390     // Deal with Op being deleted.
1391     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1392       return RV;
1393   }
1394   return SDValue();
1395 }
1396
1397 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1398   if (!LegalOperations)
1399     return SDValue();
1400
1401   EVT VT = Op.getValueType();
1402   if (VT.isVector() || !VT.isInteger())
1403     return SDValue();
1404
1405   // If operation type is 'undesirable', e.g. i16 on x86, consider
1406   // promoting it.
1407   unsigned Opc = Op.getOpcode();
1408   if (TLI.isTypeDesirableForOp(Opc, VT))
1409     return SDValue();
1410
1411   EVT PVT = VT;
1412   // Consult target whether it is a good idea to promote this operation and
1413   // what's the right type to promote it to.
1414   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1415     assert(PVT != VT && "Don't know what type to promote to!");
1416     // fold (aext (aext x)) -> (aext x)
1417     // fold (aext (zext x)) -> (zext x)
1418     // fold (aext (sext x)) -> (sext x)
1419     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1420     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1421   }
1422   return SDValue();
1423 }
1424
1425 bool DAGCombiner::PromoteLoad(SDValue Op) {
1426   if (!LegalOperations)
1427     return false;
1428
1429   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1430     return false;
1431
1432   EVT VT = Op.getValueType();
1433   if (VT.isVector() || !VT.isInteger())
1434     return false;
1435
1436   // If operation type is 'undesirable', e.g. i16 on x86, consider
1437   // promoting it.
1438   unsigned Opc = Op.getOpcode();
1439   if (TLI.isTypeDesirableForOp(Opc, VT))
1440     return false;
1441
1442   EVT PVT = VT;
1443   // Consult target whether it is a good idea to promote this operation and
1444   // what's the right type to promote it to.
1445   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1446     assert(PVT != VT && "Don't know what type to promote to!");
1447
1448     SDLoc DL(Op);
1449     SDNode *N = Op.getNode();
1450     LoadSDNode *LD = cast<LoadSDNode>(N);
1451     EVT MemVT = LD->getMemoryVT();
1452     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1453                                                       : LD->getExtensionType();
1454     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1455                                    LD->getChain(), LD->getBasePtr(),
1456                                    MemVT, LD->getMemOperand());
1457     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1458
1459     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1460                Result.getNode()->dump(&DAG); dbgs() << '\n');
1461     WorklistRemover DeadNodes(*this);
1462     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1463     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1464     deleteAndRecombine(N);
1465     AddToWorklist(Result.getNode());
1466     return true;
1467   }
1468   return false;
1469 }
1470
1471 /// Recursively delete a node which has no uses and any operands for
1472 /// which it is the only use.
1473 ///
1474 /// Note that this both deletes the nodes and removes them from the worklist.
1475 /// It also adds any nodes who have had a user deleted to the worklist as they
1476 /// may now have only one use and subject to other combines.
1477 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1478   if (!N->use_empty())
1479     return false;
1480
1481   SmallSetVector<SDNode *, 16> Nodes;
1482   Nodes.insert(N);
1483   do {
1484     N = Nodes.pop_back_val();
1485     if (!N)
1486       continue;
1487
1488     if (N->use_empty()) {
1489       for (const SDValue &ChildN : N->op_values())
1490         Nodes.insert(ChildN.getNode());
1491
1492       removeFromWorklist(N);
1493       DAG.DeleteNode(N);
1494     } else {
1495       AddToWorklist(N);
1496     }
1497   } while (!Nodes.empty());
1498   return true;
1499 }
1500
1501 //===----------------------------------------------------------------------===//
1502 //  Main DAG Combiner implementation
1503 //===----------------------------------------------------------------------===//
1504
1505 void DAGCombiner::Run(CombineLevel AtLevel) {
1506   // set the instance variables, so that the various visit routines may use it.
1507   Level = AtLevel;
1508   LegalDAG = Level >= AfterLegalizeDAG;
1509   LegalOperations = Level >= AfterLegalizeVectorOps;
1510   LegalTypes = Level >= AfterLegalizeTypes;
1511
1512   WorklistInserter AddNodes(*this);
1513
1514   // Add all the dag nodes to the worklist.
1515   for (SDNode &Node : DAG.allnodes())
1516     AddToWorklist(&Node);
1517
1518   // Create a dummy node (which is not added to allnodes), that adds a reference
1519   // to the root node, preventing it from being deleted, and tracking any
1520   // changes of the root.
1521   HandleSDNode Dummy(DAG.getRoot());
1522
1523   // While we have a valid worklist entry node, try to combine it.
1524   while (SDNode *N = getNextWorklistEntry()) {
1525     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1526     // N is deleted from the DAG, since they too may now be dead or may have a
1527     // reduced number of uses, allowing other xforms.
1528     if (recursivelyDeleteUnusedNodes(N))
1529       continue;
1530
1531     WorklistRemover DeadNodes(*this);
1532
1533     // If this combine is running after legalizing the DAG, re-legalize any
1534     // nodes pulled off the worklist.
1535     if (LegalDAG) {
1536       SmallSetVector<SDNode *, 16> UpdatedNodes;
1537       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1538
1539       for (SDNode *LN : UpdatedNodes)
1540         AddToWorklistWithUsers(LN);
1541
1542       if (!NIsValid)
1543         continue;
1544     }
1545
1546     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1547
1548     // Add any operands of the new node which have not yet been combined to the
1549     // worklist as well. Because the worklist uniques things already, this
1550     // won't repeatedly process the same operand.
1551     CombinedNodes.insert(N);
1552     for (const SDValue &ChildN : N->op_values())
1553       if (!CombinedNodes.count(ChildN.getNode()))
1554         AddToWorklist(ChildN.getNode());
1555
1556     SDValue RV = combine(N);
1557
1558     if (!RV.getNode())
1559       continue;
1560
1561     ++NodesCombined;
1562
1563     // If we get back the same node we passed in, rather than a new node or
1564     // zero, we know that the node must have defined multiple values and
1565     // CombineTo was used.  Since CombineTo takes care of the worklist
1566     // mechanics for us, we have no work to do in this case.
1567     if (RV.getNode() == N)
1568       continue;
1569
1570     assert(N->getOpcode() != ISD::DELETED_NODE &&
1571            RV.getOpcode() != ISD::DELETED_NODE &&
1572            "Node was deleted but visit returned new node!");
1573
1574     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1575
1576     if (N->getNumValues() == RV.getNode()->getNumValues())
1577       DAG.ReplaceAllUsesWith(N, RV.getNode());
1578     else {
1579       assert(N->getValueType(0) == RV.getValueType() &&
1580              N->getNumValues() == 1 && "Type mismatch");
1581       DAG.ReplaceAllUsesWith(N, &RV);
1582     }
1583
1584     // Push the new node and any users onto the worklist.  Omit this if the
1585     // new node is the EntryToken (e.g. if a store managed to get optimized
1586     // out), because re-visiting the EntryToken and its users will not uncover
1587     // any additional opportunities, but there may be a large number of such
1588     // users, potentially causing compile time explosion.
1589     if (RV.getOpcode() != ISD::EntryToken) {
1590       AddToWorklist(RV.getNode());
1591       AddUsersToWorklist(RV.getNode());
1592     }
1593
1594     // Finally, if the node is now dead, remove it from the graph.  The node
1595     // may not be dead if the replacement process recursively simplified to
1596     // something else needing this node. This will also take care of adding any
1597     // operands which have lost a user to the worklist.
1598     recursivelyDeleteUnusedNodes(N);
1599   }
1600
1601   // If the root changed (e.g. it was a dead load, update the root).
1602   DAG.setRoot(Dummy.getValue());
1603   DAG.RemoveDeadNodes();
1604 }
1605
1606 SDValue DAGCombiner::visit(SDNode *N) {
1607   switch (N->getOpcode()) {
1608   default: break;
1609   case ISD::TokenFactor:        return visitTokenFactor(N);
1610   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1611   case ISD::ADD:                return visitADD(N);
1612   case ISD::SUB:                return visitSUB(N);
1613   case ISD::SADDSAT:
1614   case ISD::UADDSAT:            return visitADDSAT(N);
1615   case ISD::SSUBSAT:
1616   case ISD::USUBSAT:            return visitSUBSAT(N);
1617   case ISD::ADDC:               return visitADDC(N);
1618   case ISD::SADDO:
1619   case ISD::UADDO:              return visitADDO(N);
1620   case ISD::SUBC:               return visitSUBC(N);
1621   case ISD::SSUBO:
1622   case ISD::USUBO:              return visitSUBO(N);
1623   case ISD::ADDE:               return visitADDE(N);
1624   case ISD::ADDCARRY:           return visitADDCARRY(N);
1625   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1626   case ISD::SUBE:               return visitSUBE(N);
1627   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1628   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1629   case ISD::SMULFIX:
1630   case ISD::SMULFIXSAT:
1631   case ISD::UMULFIX:
1632   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1633   case ISD::MUL:                return visitMUL(N);
1634   case ISD::SDIV:               return visitSDIV(N);
1635   case ISD::UDIV:               return visitUDIV(N);
1636   case ISD::SREM:
1637   case ISD::UREM:               return visitREM(N);
1638   case ISD::MULHU:              return visitMULHU(N);
1639   case ISD::MULHS:              return visitMULHS(N);
1640   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1641   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1642   case ISD::SMULO:
1643   case ISD::UMULO:              return visitMULO(N);
1644   case ISD::SMIN:
1645   case ISD::SMAX:
1646   case ISD::UMIN:
1647   case ISD::UMAX:               return visitIMINMAX(N);
1648   case ISD::AND:                return visitAND(N);
1649   case ISD::OR:                 return visitOR(N);
1650   case ISD::XOR:                return visitXOR(N);
1651   case ISD::SHL:                return visitSHL(N);
1652   case ISD::SRA:                return visitSRA(N);
1653   case ISD::SRL:                return visitSRL(N);
1654   case ISD::ROTR:
1655   case ISD::ROTL:               return visitRotate(N);
1656   case ISD::FSHL:
1657   case ISD::FSHR:               return visitFunnelShift(N);
1658   case ISD::ABS:                return visitABS(N);
1659   case ISD::BSWAP:              return visitBSWAP(N);
1660   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1661   case ISD::CTLZ:               return visitCTLZ(N);
1662   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1663   case ISD::CTTZ:               return visitCTTZ(N);
1664   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1665   case ISD::CTPOP:              return visitCTPOP(N);
1666   case ISD::SELECT:             return visitSELECT(N);
1667   case ISD::VSELECT:            return visitVSELECT(N);
1668   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1669   case ISD::SETCC:              return visitSETCC(N);
1670   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1671   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1672   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1673   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1674   case ISD::AssertSext:
1675   case ISD::AssertZext:         return visitAssertExt(N);
1676   case ISD::AssertAlign:        return visitAssertAlign(N);
1677   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1678   case ISD::SIGN_EXTEND_VECTOR_INREG:
1679   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1680   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1681   case ISD::BITCAST:            return visitBITCAST(N);
1682   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1683   case ISD::FADD:               return visitFADD(N);
1684   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
1685   case ISD::FSUB:               return visitFSUB(N);
1686   case ISD::FMUL:               return visitFMUL(N);
1687   case ISD::FMA:                return visitFMA(N);
1688   case ISD::FDIV:               return visitFDIV(N);
1689   case ISD::FREM:               return visitFREM(N);
1690   case ISD::FSQRT:              return visitFSQRT(N);
1691   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1692   case ISD::FPOW:               return visitFPOW(N);
1693   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1694   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1695   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1696   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1697   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1698   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1699   case ISD::FNEG:               return visitFNEG(N);
1700   case ISD::FABS:               return visitFABS(N);
1701   case ISD::FFLOOR:             return visitFFLOOR(N);
1702   case ISD::FMINNUM:            return visitFMINNUM(N);
1703   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1704   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1705   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1706   case ISD::FCEIL:              return visitFCEIL(N);
1707   case ISD::FTRUNC:             return visitFTRUNC(N);
1708   case ISD::BRCOND:             return visitBRCOND(N);
1709   case ISD::BR_CC:              return visitBR_CC(N);
1710   case ISD::LOAD:               return visitLOAD(N);
1711   case ISD::STORE:              return visitSTORE(N);
1712   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1713   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1714   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1715   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1716   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1717   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1718   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1719   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1720   case ISD::MGATHER:            return visitMGATHER(N);
1721   case ISD::MLOAD:              return visitMLOAD(N);
1722   case ISD::MSCATTER:           return visitMSCATTER(N);
1723   case ISD::MSTORE:             return visitMSTORE(N);
1724   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1725   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1726   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1727   case ISD::FREEZE:             return visitFREEZE(N);
1728   case ISD::VECREDUCE_FADD:
1729   case ISD::VECREDUCE_FMUL:
1730   case ISD::VECREDUCE_ADD:
1731   case ISD::VECREDUCE_MUL:
1732   case ISD::VECREDUCE_AND:
1733   case ISD::VECREDUCE_OR:
1734   case ISD::VECREDUCE_XOR:
1735   case ISD::VECREDUCE_SMAX:
1736   case ISD::VECREDUCE_SMIN:
1737   case ISD::VECREDUCE_UMAX:
1738   case ISD::VECREDUCE_UMIN:
1739   case ISD::VECREDUCE_FMAX:
1740   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1741   }
1742   return SDValue();
1743 }
1744
1745 SDValue DAGCombiner::combine(SDNode *N) {
1746   SDValue RV;
1747   if (!DisableGenericCombines)
1748     RV = visit(N);
1749
1750   // If nothing happened, try a target-specific DAG combine.
1751   if (!RV.getNode()) {
1752     assert(N->getOpcode() != ISD::DELETED_NODE &&
1753            "Node was deleted but visit returned NULL!");
1754
1755     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1756         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1757
1758       // Expose the DAG combiner to the target combiner impls.
1759       TargetLowering::DAGCombinerInfo
1760         DagCombineInfo(DAG, Level, false, this);
1761
1762       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1763     }
1764   }
1765
1766   // If nothing happened still, try promoting the operation.
1767   if (!RV.getNode()) {
1768     switch (N->getOpcode()) {
1769     default: break;
1770     case ISD::ADD:
1771     case ISD::SUB:
1772     case ISD::MUL:
1773     case ISD::AND:
1774     case ISD::OR:
1775     case ISD::XOR:
1776       RV = PromoteIntBinOp(SDValue(N, 0));
1777       break;
1778     case ISD::SHL:
1779     case ISD::SRA:
1780     case ISD::SRL:
1781       RV = PromoteIntShiftOp(SDValue(N, 0));
1782       break;
1783     case ISD::SIGN_EXTEND:
1784     case ISD::ZERO_EXTEND:
1785     case ISD::ANY_EXTEND:
1786       RV = PromoteExtend(SDValue(N, 0));
1787       break;
1788     case ISD::LOAD:
1789       if (PromoteLoad(SDValue(N, 0)))
1790         RV = SDValue(N, 0);
1791       break;
1792     }
1793   }
1794
1795   // If N is a commutative binary node, try to eliminate it if the commuted
1796   // version is already present in the DAG.
1797   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1798       N->getNumValues() == 1) {
1799     SDValue N0 = N->getOperand(0);
1800     SDValue N1 = N->getOperand(1);
1801
1802     // Constant operands are canonicalized to RHS.
1803     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1804       SDValue Ops[] = {N1, N0};
1805       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1806                                             N->getFlags());
1807       if (CSENode)
1808         return SDValue(CSENode, 0);
1809     }
1810   }
1811
1812   return RV;
1813 }
1814
1815 /// Given a node, return its input chain if it has one, otherwise return a null
1816 /// sd operand.
1817 static SDValue getInputChainForNode(SDNode *N) {
1818   if (unsigned NumOps = N->getNumOperands()) {
1819     if (N->getOperand(0).getValueType() == MVT::Other)
1820       return N->getOperand(0);
1821     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1822       return N->getOperand(NumOps-1);
1823     for (unsigned i = 1; i < NumOps-1; ++i)
1824       if (N->getOperand(i).getValueType() == MVT::Other)
1825         return N->getOperand(i);
1826   }
1827   return SDValue();
1828 }
1829
1830 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1831   // If N has two operands, where one has an input chain equal to the other,
1832   // the 'other' chain is redundant.
1833   if (N->getNumOperands() == 2) {
1834     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1835       return N->getOperand(0);
1836     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1837       return N->getOperand(1);
1838   }
1839
1840   // Don't simplify token factors if optnone.
1841   if (OptLevel == CodeGenOpt::None)
1842     return SDValue();
1843
1844   // Don't simplify the token factor if the node itself has too many operands.
1845   if (N->getNumOperands() > TokenFactorInlineLimit)
1846     return SDValue();
1847
1848   // If the sole user is a token factor, we should make sure we have a
1849   // chance to merge them together. This prevents TF chains from inhibiting
1850   // optimizations.
1851   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1852     AddToWorklist(*(N->use_begin()));
1853
1854   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1855   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1856   SmallPtrSet<SDNode*, 16> SeenOps;
1857   bool Changed = false;             // If we should replace this token factor.
1858
1859   // Start out with this token factor.
1860   TFs.push_back(N);
1861
1862   // Iterate through token factors.  The TFs grows when new token factors are
1863   // encountered.
1864   for (unsigned i = 0; i < TFs.size(); ++i) {
1865     // Limit number of nodes to inline, to avoid quadratic compile times.
1866     // We have to add the outstanding Token Factors to Ops, otherwise we might
1867     // drop Ops from the resulting Token Factors.
1868     if (Ops.size() > TokenFactorInlineLimit) {
1869       for (unsigned j = i; j < TFs.size(); j++)
1870         Ops.emplace_back(TFs[j], 0);
1871       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1872       // combiner worklist later.
1873       TFs.resize(i);
1874       break;
1875     }
1876
1877     SDNode *TF = TFs[i];
1878     // Check each of the operands.
1879     for (const SDValue &Op : TF->op_values()) {
1880       switch (Op.getOpcode()) {
1881       case ISD::EntryToken:
1882         // Entry tokens don't need to be added to the list. They are
1883         // redundant.
1884         Changed = true;
1885         break;
1886
1887       case ISD::TokenFactor:
1888         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1889           // Queue up for processing.
1890           TFs.push_back(Op.getNode());
1891           Changed = true;
1892           break;
1893         }
1894         LLVM_FALLTHROUGH;
1895
1896       default:
1897         // Only add if it isn't already in the list.
1898         if (SeenOps.insert(Op.getNode()).second)
1899           Ops.push_back(Op);
1900         else
1901           Changed = true;
1902         break;
1903       }
1904     }
1905   }
1906
1907   // Re-visit inlined Token Factors, to clean them up in case they have been
1908   // removed. Skip the first Token Factor, as this is the current node.
1909   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1910     AddToWorklist(TFs[i]);
1911
1912   // Remove Nodes that are chained to another node in the list. Do so
1913   // by walking up chains breath-first stopping when we've seen
1914   // another operand. In general we must climb to the EntryNode, but we can exit
1915   // early if we find all remaining work is associated with just one operand as
1916   // no further pruning is possible.
1917
1918   // List of nodes to search through and original Ops from which they originate.
1919   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1920   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1921   SmallPtrSet<SDNode *, 16> SeenChains;
1922   bool DidPruneOps = false;
1923
1924   unsigned NumLeftToConsider = 0;
1925   for (const SDValue &Op : Ops) {
1926     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1927     OpWorkCount.push_back(1);
1928   }
1929
1930   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1931     // If this is an Op, we can remove the op from the list. Remark any
1932     // search associated with it as from the current OpNumber.
1933     if (SeenOps.contains(Op)) {
1934       Changed = true;
1935       DidPruneOps = true;
1936       unsigned OrigOpNumber = 0;
1937       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1938         OrigOpNumber++;
1939       assert((OrigOpNumber != Ops.size()) &&
1940              "expected to find TokenFactor Operand");
1941       // Re-mark worklist from OrigOpNumber to OpNumber
1942       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1943         if (Worklist[i].second == OrigOpNumber) {
1944           Worklist[i].second = OpNumber;
1945         }
1946       }
1947       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1948       OpWorkCount[OrigOpNumber] = 0;
1949       NumLeftToConsider--;
1950     }
1951     // Add if it's a new chain
1952     if (SeenChains.insert(Op).second) {
1953       OpWorkCount[OpNumber]++;
1954       Worklist.push_back(std::make_pair(Op, OpNumber));
1955     }
1956   };
1957
1958   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1959     // We need at least be consider at least 2 Ops to prune.
1960     if (NumLeftToConsider <= 1)
1961       break;
1962     auto CurNode = Worklist[i].first;
1963     auto CurOpNumber = Worklist[i].second;
1964     assert((OpWorkCount[CurOpNumber] > 0) &&
1965            "Node should not appear in worklist");
1966     switch (CurNode->getOpcode()) {
1967     case ISD::EntryToken:
1968       // Hitting EntryToken is the only way for the search to terminate without
1969       // hitting
1970       // another operand's search. Prevent us from marking this operand
1971       // considered.
1972       NumLeftToConsider++;
1973       break;
1974     case ISD::TokenFactor:
1975       for (const SDValue &Op : CurNode->op_values())
1976         AddToWorklist(i, Op.getNode(), CurOpNumber);
1977       break;
1978     case ISD::LIFETIME_START:
1979     case ISD::LIFETIME_END:
1980     case ISD::CopyFromReg:
1981     case ISD::CopyToReg:
1982       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1983       break;
1984     default:
1985       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1986         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1987       break;
1988     }
1989     OpWorkCount[CurOpNumber]--;
1990     if (OpWorkCount[CurOpNumber] == 0)
1991       NumLeftToConsider--;
1992   }
1993
1994   // If we've changed things around then replace token factor.
1995   if (Changed) {
1996     SDValue Result;
1997     if (Ops.empty()) {
1998       // The entry token is the only possible outcome.
1999       Result = DAG.getEntryNode();
2000     } else {
2001       if (DidPruneOps) {
2002         SmallVector<SDValue, 8> PrunedOps;
2003         //
2004         for (const SDValue &Op : Ops) {
2005           if (SeenChains.count(Op.getNode()) == 0)
2006             PrunedOps.push_back(Op);
2007         }
2008         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2009       } else {
2010         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2011       }
2012     }
2013     return Result;
2014   }
2015   return SDValue();
2016 }
2017
2018 /// MERGE_VALUES can always be eliminated.
2019 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2020   WorklistRemover DeadNodes(*this);
2021   // Replacing results may cause a different MERGE_VALUES to suddenly
2022   // be CSE'd with N, and carry its uses with it. Iterate until no
2023   // uses remain, to ensure that the node can be safely deleted.
2024   // First add the users of this node to the work list so that they
2025   // can be tried again once they have new operands.
2026   AddUsersToWorklist(N);
2027   do {
2028     // Do as a single replacement to avoid rewalking use lists.
2029     SmallVector<SDValue, 8> Ops;
2030     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2031       Ops.push_back(N->getOperand(i));
2032     DAG.ReplaceAllUsesWith(N, Ops.data());
2033   } while (!N->use_empty());
2034   deleteAndRecombine(N);
2035   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2036 }
2037
2038 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2039 /// ConstantSDNode pointer else nullptr.
2040 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2041   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2042   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2043 }
2044
2045 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2046 /// and that N may be folded in the load / store addressing mode.
2047 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2048                                     const TargetLowering &TLI) {
2049   EVT VT;
2050   unsigned AS;
2051
2052   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2053     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2054       return false;
2055     VT = LD->getMemoryVT();
2056     AS = LD->getAddressSpace();
2057   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2058     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2059       return false;
2060     VT = ST->getMemoryVT();
2061     AS = ST->getAddressSpace();
2062   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2063     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2064       return false;
2065     VT = LD->getMemoryVT();
2066     AS = LD->getAddressSpace();
2067   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2068     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2069       return false;
2070     VT = ST->getMemoryVT();
2071     AS = ST->getAddressSpace();
2072   } else
2073     return false;
2074
2075   TargetLowering::AddrMode AM;
2076   if (N->getOpcode() == ISD::ADD) {
2077     AM.HasBaseReg = true;
2078     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2079     if (Offset)
2080       // [reg +/- imm]
2081       AM.BaseOffs = Offset->getSExtValue();
2082     else
2083       // [reg +/- reg]
2084       AM.Scale = 1;
2085   } else if (N->getOpcode() == ISD::SUB) {
2086     AM.HasBaseReg = true;
2087     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2088     if (Offset)
2089       // [reg +/- imm]
2090       AM.BaseOffs = -Offset->getSExtValue();
2091     else
2092       // [reg +/- reg]
2093       AM.Scale = 1;
2094   } else
2095     return false;
2096
2097   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2098                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2099 }
2100
2101 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2102   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2103          "Unexpected binary operator");
2104
2105   // Don't do this unless the old select is going away. We want to eliminate the
2106   // binary operator, not replace a binop with a select.
2107   // TODO: Handle ISD::SELECT_CC.
2108   unsigned SelOpNo = 0;
2109   SDValue Sel = BO->getOperand(0);
2110   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2111     SelOpNo = 1;
2112     Sel = BO->getOperand(1);
2113   }
2114
2115   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2116     return SDValue();
2117
2118   SDValue CT = Sel.getOperand(1);
2119   if (!isConstantOrConstantVector(CT, true) &&
2120       !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2121     return SDValue();
2122
2123   SDValue CF = Sel.getOperand(2);
2124   if (!isConstantOrConstantVector(CF, true) &&
2125       !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2126     return SDValue();
2127
2128   // Bail out if any constants are opaque because we can't constant fold those.
2129   // The exception is "and" and "or" with either 0 or -1 in which case we can
2130   // propagate non constant operands into select. I.e.:
2131   // and (select Cond, 0, -1), X --> select Cond, 0, X
2132   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2133   auto BinOpcode = BO->getOpcode();
2134   bool CanFoldNonConst =
2135       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2136       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2137       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2138
2139   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2140   if (!CanFoldNonConst &&
2141       !isConstantOrConstantVector(CBO, true) &&
2142       !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2143     return SDValue();
2144
2145   EVT VT = BO->getValueType(0);
2146
2147   // We have a select-of-constants followed by a binary operator with a
2148   // constant. Eliminate the binop by pulling the constant math into the select.
2149   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2150   SDLoc DL(Sel);
2151   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2152                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2153   if (!CanFoldNonConst && !NewCT.isUndef() &&
2154       !isConstantOrConstantVector(NewCT, true) &&
2155       !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2156     return SDValue();
2157
2158   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2159                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2160   if (!CanFoldNonConst && !NewCF.isUndef() &&
2161       !isConstantOrConstantVector(NewCF, true) &&
2162       !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2163     return SDValue();
2164
2165   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2166   SelectOp->setFlags(BO->getFlags());
2167   return SelectOp;
2168 }
2169
2170 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2171   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2172          "Expecting add or sub");
2173
2174   // Match a constant operand and a zext operand for the math instruction:
2175   // add Z, C
2176   // sub C, Z
2177   bool IsAdd = N->getOpcode() == ISD::ADD;
2178   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2179   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2180   auto *CN = dyn_cast<ConstantSDNode>(C);
2181   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2182     return SDValue();
2183
2184   // Match the zext operand as a setcc of a boolean.
2185   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2186       Z.getOperand(0).getValueType() != MVT::i1)
2187     return SDValue();
2188
2189   // Match the compare as: setcc (X & 1), 0, eq.
2190   SDValue SetCC = Z.getOperand(0);
2191   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2192   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2193       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2194       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2195     return SDValue();
2196
2197   // We are adding/subtracting a constant and an inverted low bit. Turn that
2198   // into a subtract/add of the low bit with incremented/decremented constant:
2199   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2200   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2201   EVT VT = C.getValueType();
2202   SDLoc DL(N);
2203   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2204   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2205                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2206   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2207 }
2208
2209 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2210 /// a shift and add with a different constant.
2211 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2212   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2213          "Expecting add or sub");
2214
2215   // We need a constant operand for the add/sub, and the other operand is a
2216   // logical shift right: add (srl), C or sub C, (srl).
2217   bool IsAdd = N->getOpcode() == ISD::ADD;
2218   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2219   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2220   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2221       ShiftOp.getOpcode() != ISD::SRL)
2222     return SDValue();
2223
2224   // The shift must be of a 'not' value.
2225   SDValue Not = ShiftOp.getOperand(0);
2226   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2227     return SDValue();
2228
2229   // The shift must be moving the sign bit to the least-significant-bit.
2230   EVT VT = ShiftOp.getValueType();
2231   SDValue ShAmt = ShiftOp.getOperand(1);
2232   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2233   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2234     return SDValue();
2235
2236   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2237   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2238   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2239   SDLoc DL(N);
2240   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2241   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2242   if (SDValue NewC =
2243           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2244                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2245     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2246   return SDValue();
2247 }
2248
2249 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2250 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2251 /// are no common bits set in the operands).
2252 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2253   SDValue N0 = N->getOperand(0);
2254   SDValue N1 = N->getOperand(1);
2255   EVT VT = N0.getValueType();
2256   SDLoc DL(N);
2257
2258   // fold vector ops
2259   if (VT.isVector()) {
2260     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2261       return FoldedVOp;
2262
2263     // fold (add x, 0) -> x, vector edition
2264     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2265       return N0;
2266     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2267       return N1;
2268   }
2269
2270   // fold (add x, undef) -> undef
2271   if (N0.isUndef())
2272     return N0;
2273
2274   if (N1.isUndef())
2275     return N1;
2276
2277   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2278     // canonicalize constant to RHS
2279     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2280       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2281     // fold (add c1, c2) -> c1+c2
2282     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2283   }
2284
2285   // fold (add x, 0) -> x
2286   if (isNullConstant(N1))
2287     return N0;
2288
2289   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2290     // fold ((A-c1)+c2) -> (A+(c2-c1))
2291     if (N0.getOpcode() == ISD::SUB &&
2292         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2293       SDValue Sub =
2294           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2295       assert(Sub && "Constant folding failed");
2296       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2297     }
2298
2299     // fold ((c1-A)+c2) -> (c1+c2)-A
2300     if (N0.getOpcode() == ISD::SUB &&
2301         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2302       SDValue Add =
2303           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2304       assert(Add && "Constant folding failed");
2305       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2306     }
2307
2308     // add (sext i1 X), 1 -> zext (not i1 X)
2309     // We don't transform this pattern:
2310     //   add (zext i1 X), -1 -> sext (not i1 X)
2311     // because most (?) targets generate better code for the zext form.
2312     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2313         isOneOrOneSplat(N1)) {
2314       SDValue X = N0.getOperand(0);
2315       if ((!LegalOperations ||
2316            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2317             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2318           X.getScalarValueSizeInBits() == 1) {
2319         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2320         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2321       }
2322     }
2323
2324     // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2325     // equivalent to (add x, c0).
2326     if (N0.getOpcode() == ISD::OR &&
2327         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2328         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2329       if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2330                                                     {N1, N0.getOperand(1)}))
2331         return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2332     }
2333   }
2334
2335   if (SDValue NewSel = foldBinOpIntoSelect(N))
2336     return NewSel;
2337
2338   // reassociate add
2339   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2340     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2341       return RADD;
2342
2343     // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2344     // equivalent to (add x, c).
2345     auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2346       if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2347           isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2348           DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2349         return DAG.getNode(ISD::ADD, DL, VT,
2350                            DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2351                            N0.getOperand(1));
2352       }
2353       return SDValue();
2354     };
2355     if (SDValue Add = ReassociateAddOr(N0, N1))
2356       return Add;
2357     if (SDValue Add = ReassociateAddOr(N1, N0))
2358       return Add;
2359   }
2360   // fold ((0-A) + B) -> B-A
2361   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2362     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2363
2364   // fold (A + (0-B)) -> A-B
2365   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2366     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2367
2368   // fold (A+(B-A)) -> B
2369   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2370     return N1.getOperand(0);
2371
2372   // fold ((B-A)+A) -> B
2373   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2374     return N0.getOperand(0);
2375
2376   // fold ((A-B)+(C-A)) -> (C-B)
2377   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2378       N0.getOperand(0) == N1.getOperand(1))
2379     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380                        N0.getOperand(1));
2381
2382   // fold ((A-B)+(B-C)) -> (A-C)
2383   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2384       N0.getOperand(1) == N1.getOperand(0))
2385     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2386                        N1.getOperand(1));
2387
2388   // fold (A+(B-(A+C))) to (B-C)
2389   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2390       N0 == N1.getOperand(1).getOperand(0))
2391     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2392                        N1.getOperand(1).getOperand(1));
2393
2394   // fold (A+(B-(C+A))) to (B-C)
2395   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2396       N0 == N1.getOperand(1).getOperand(1))
2397     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2398                        N1.getOperand(1).getOperand(0));
2399
2400   // fold (A+((B-A)+or-C)) to (B+or-C)
2401   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2402       N1.getOperand(0).getOpcode() == ISD::SUB &&
2403       N0 == N1.getOperand(0).getOperand(1))
2404     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2405                        N1.getOperand(1));
2406
2407   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2408   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2409     SDValue N00 = N0.getOperand(0);
2410     SDValue N01 = N0.getOperand(1);
2411     SDValue N10 = N1.getOperand(0);
2412     SDValue N11 = N1.getOperand(1);
2413
2414     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2415       return DAG.getNode(ISD::SUB, DL, VT,
2416                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2417                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2418   }
2419
2420   // fold (add (umax X, C), -C) --> (usubsat X, C)
2421   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2422     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2423       return (!Max && !Op) ||
2424              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2425     };
2426     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2427                                   /*AllowUndefs*/ true))
2428       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2429                          N0.getOperand(1));
2430   }
2431
2432   if (SimplifyDemandedBits(SDValue(N, 0)))
2433     return SDValue(N, 0);
2434
2435   if (isOneOrOneSplat(N1)) {
2436     // fold (add (xor a, -1), 1) -> (sub 0, a)
2437     if (isBitwiseNot(N0))
2438       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2439                          N0.getOperand(0));
2440
2441     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2442     if (N0.getOpcode() == ISD::ADD) {
2443       SDValue A, Xor;
2444
2445       if (isBitwiseNot(N0.getOperand(0))) {
2446         A = N0.getOperand(1);
2447         Xor = N0.getOperand(0);
2448       } else if (isBitwiseNot(N0.getOperand(1))) {
2449         A = N0.getOperand(0);
2450         Xor = N0.getOperand(1);
2451       }
2452
2453       if (Xor)
2454         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2455     }
2456
2457     // Look for:
2458     //   add (add x, y), 1
2459     // And if the target does not like this form then turn into:
2460     //   sub y, (xor x, -1)
2461     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2462         N0.getOpcode() == ISD::ADD) {
2463       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2464                                 DAG.getAllOnesConstant(DL, VT));
2465       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2466     }
2467   }
2468
2469   // (x - y) + -1  ->  add (xor y, -1), x
2470   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2471       isAllOnesOrAllOnesSplat(N1)) {
2472     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2473     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2474   }
2475
2476   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2477     return Combined;
2478
2479   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2480     return Combined;
2481
2482   return SDValue();
2483 }
2484
2485 SDValue DAGCombiner::visitADD(SDNode *N) {
2486   SDValue N0 = N->getOperand(0);
2487   SDValue N1 = N->getOperand(1);
2488   EVT VT = N0.getValueType();
2489   SDLoc DL(N);
2490
2491   if (SDValue Combined = visitADDLike(N))
2492     return Combined;
2493
2494   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2495     return V;
2496
2497   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2498     return V;
2499
2500   // fold (a+b) -> (a|b) iff a and b share no bits.
2501   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2502       DAG.haveNoCommonBitsSet(N0, N1))
2503     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2504
2505   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2506   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2507     const APInt &C0 = N0->getConstantOperandAPInt(0);
2508     const APInt &C1 = N1->getConstantOperandAPInt(0);
2509     return DAG.getVScale(DL, VT, C0 + C1);
2510   }
2511
2512   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2513   if ((N0.getOpcode() == ISD::ADD) &&
2514       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2515       (N1.getOpcode() == ISD::VSCALE)) {
2516     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2517     const APInt &VS1 = N1->getConstantOperandAPInt(0);
2518     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2519     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2520   }
2521
2522   // Fold (add step_vector(c1), step_vector(c2)  to step_vector(c1+c2))
2523   if (N0.getOpcode() == ISD::STEP_VECTOR &&
2524       N1.getOpcode() == ISD::STEP_VECTOR) {
2525     const APInt &C0 = N0->getConstantOperandAPInt(0);
2526     const APInt &C1 = N1->getConstantOperandAPInt(0);
2527     APInt NewStep = C0 + C1;
2528     return DAG.getStepVector(DL, VT, NewStep);
2529   }
2530
2531   // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2532   if ((N0.getOpcode() == ISD::ADD) &&
2533       (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2534       (N1.getOpcode() == ISD::STEP_VECTOR)) {
2535     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2536     const APInt &SV1 = N1->getConstantOperandAPInt(0);
2537     APInt NewStep = SV0 + SV1;
2538     SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2539     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2540   }
2541
2542   return SDValue();
2543 }
2544
2545 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2546   unsigned Opcode = N->getOpcode();
2547   SDValue N0 = N->getOperand(0);
2548   SDValue N1 = N->getOperand(1);
2549   EVT VT = N0.getValueType();
2550   SDLoc DL(N);
2551
2552   // fold vector ops
2553   if (VT.isVector()) {
2554     // TODO SimplifyVBinOp
2555
2556     // fold (add_sat x, 0) -> x, vector edition
2557     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2558       return N0;
2559     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2560       return N1;
2561   }
2562
2563   // fold (add_sat x, undef) -> -1
2564   if (N0.isUndef() || N1.isUndef())
2565     return DAG.getAllOnesConstant(DL, VT);
2566
2567   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2568     // canonicalize constant to RHS
2569     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2570       return DAG.getNode(Opcode, DL, VT, N1, N0);
2571     // fold (add_sat c1, c2) -> c3
2572     return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2573   }
2574
2575   // fold (add_sat x, 0) -> x
2576   if (isNullConstant(N1))
2577     return N0;
2578
2579   // If it cannot overflow, transform into an add.
2580   if (Opcode == ISD::UADDSAT)
2581     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2582       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2583
2584   return SDValue();
2585 }
2586
2587 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2588   bool Masked = false;
2589
2590   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2591   while (true) {
2592     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2593       V = V.getOperand(0);
2594       continue;
2595     }
2596
2597     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2598       Masked = true;
2599       V = V.getOperand(0);
2600       continue;
2601     }
2602
2603     break;
2604   }
2605
2606   // If this is not a carry, return.
2607   if (V.getResNo() != 1)
2608     return SDValue();
2609
2610   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2611       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2612     return SDValue();
2613
2614   EVT VT = V.getNode()->getValueType(0);
2615   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2616     return SDValue();
2617
2618   // If the result is masked, then no matter what kind of bool it is we can
2619   // return. If it isn't, then we need to make sure the bool type is either 0 or
2620   // 1 and not other values.
2621   if (Masked ||
2622       TLI.getBooleanContents(V.getValueType()) ==
2623           TargetLoweringBase::ZeroOrOneBooleanContent)
2624     return V;
2625
2626   return SDValue();
2627 }
2628
2629 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2630 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2631 /// the opcode and bypass the mask operation.
2632 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2633                                  SelectionDAG &DAG, const SDLoc &DL) {
2634   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2635     return SDValue();
2636
2637   EVT VT = N0.getValueType();
2638   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2639     return SDValue();
2640
2641   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2642   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2643   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2644 }
2645
2646 /// Helper for doing combines based on N0 and N1 being added to each other.
2647 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2648                                           SDNode *LocReference) {
2649   EVT VT = N0.getValueType();
2650   SDLoc DL(LocReference);
2651
2652   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2653   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2654       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2655     return DAG.getNode(ISD::SUB, DL, VT, N0,
2656                        DAG.getNode(ISD::SHL, DL, VT,
2657                                    N1.getOperand(0).getOperand(1),
2658                                    N1.getOperand(1)));
2659
2660   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2661     return V;
2662
2663   // Look for:
2664   //   add (add x, 1), y
2665   // And if the target does not like this form then turn into:
2666   //   sub y, (xor x, -1)
2667   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2668       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2669     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2670                               DAG.getAllOnesConstant(DL, VT));
2671     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2672   }
2673
2674   // Hoist one-use subtraction by non-opaque constant:
2675   //   (x - C) + y  ->  (x + y) - C
2676   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2677   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2678       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2679     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2680     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2681   }
2682   // Hoist one-use subtraction from non-opaque constant:
2683   //   (C - x) + y  ->  (y - x) + C
2684   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2685       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2686     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2687     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2688   }
2689
2690   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2691   // rather than 'add 0/-1' (the zext should get folded).
2692   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2693   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2694       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2695       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2696     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2697     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2698   }
2699
2700   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2701   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2702     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2703     if (TN->getVT() == MVT::i1) {
2704       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2705                                  DAG.getConstant(1, DL, VT));
2706       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2707     }
2708   }
2709
2710   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2711   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2712       N1.getResNo() == 0)
2713     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2714                        N0, N1.getOperand(0), N1.getOperand(2));
2715
2716   // (add X, Carry) -> (addcarry X, 0, Carry)
2717   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2718     if (SDValue Carry = getAsCarry(TLI, N1))
2719       return DAG.getNode(ISD::ADDCARRY, DL,
2720                          DAG.getVTList(VT, Carry.getValueType()), N0,
2721                          DAG.getConstant(0, DL, VT), Carry);
2722
2723   return SDValue();
2724 }
2725
2726 SDValue DAGCombiner::visitADDC(SDNode *N) {
2727   SDValue N0 = N->getOperand(0);
2728   SDValue N1 = N->getOperand(1);
2729   EVT VT = N0.getValueType();
2730   SDLoc DL(N);
2731
2732   // If the flag result is dead, turn this into an ADD.
2733   if (!N->hasAnyUseOfValue(1))
2734     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2735                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2736
2737   // canonicalize constant to RHS.
2738   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2739   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2740   if (N0C && !N1C)
2741     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2742
2743   // fold (addc x, 0) -> x + no carry out
2744   if (isNullConstant(N1))
2745     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2746                                         DL, MVT::Glue));
2747
2748   // If it cannot overflow, transform into an add.
2749   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2750     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2751                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2752
2753   return SDValue();
2754 }
2755
2756 /**
2757  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2758  * then the flip also occurs if computing the inverse is the same cost.
2759  * This function returns an empty SDValue in case it cannot flip the boolean
2760  * without increasing the cost of the computation. If you want to flip a boolean
2761  * no matter what, use DAG.getLogicalNOT.
2762  */
2763 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2764                                   const TargetLowering &TLI,
2765                                   bool Force) {
2766   if (Force && isa<ConstantSDNode>(V))
2767     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2768
2769   if (V.getOpcode() != ISD::XOR)
2770     return SDValue();
2771
2772   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2773   if (!Const)
2774     return SDValue();
2775
2776   EVT VT = V.getValueType();
2777
2778   bool IsFlip = false;
2779   switch(TLI.getBooleanContents(VT)) {
2780     case TargetLowering::ZeroOrOneBooleanContent:
2781       IsFlip = Const->isOne();
2782       break;
2783     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2784       IsFlip = Const->isAllOnesValue();
2785       break;
2786     case TargetLowering::UndefinedBooleanContent:
2787       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2788       break;
2789   }
2790
2791   if (IsFlip)
2792     return V.getOperand(0);
2793   if (Force)
2794     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2795   return SDValue();
2796 }
2797
2798 SDValue DAGCombiner::visitADDO(SDNode *N) {
2799   SDValue N0 = N->getOperand(0);
2800   SDValue N1 = N->getOperand(1);
2801   EVT VT = N0.getValueType();
2802   bool IsSigned = (ISD::SADDO == N->getOpcode());
2803
2804   EVT CarryVT = N->getValueType(1);
2805   SDLoc DL(N);
2806
2807   // If the flag result is dead, turn this into an ADD.
2808   if (!N->hasAnyUseOfValue(1))
2809     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2810                      DAG.getUNDEF(CarryVT));
2811
2812   // canonicalize constant to RHS.
2813   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2814       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2815     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2816
2817   // fold (addo x, 0) -> x + no carry out
2818   if (isNullOrNullSplat(N1))
2819     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2820
2821   if (!IsSigned) {
2822     // If it cannot overflow, transform into an add.
2823     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2824       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2825                        DAG.getConstant(0, DL, CarryVT));
2826
2827     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2828     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2829       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2830                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2831       return CombineTo(
2832           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2833     }
2834
2835     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2836       return Combined;
2837
2838     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2839       return Combined;
2840   }
2841
2842   return SDValue();
2843 }
2844
2845 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2846   EVT VT = N0.getValueType();
2847   if (VT.isVector())
2848     return SDValue();
2849
2850   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2851   // If Y + 1 cannot overflow.
2852   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2853     SDValue Y = N1.getOperand(0);
2854     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2855     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2856       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2857                          N1.getOperand(2));
2858   }
2859
2860   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2861   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2862     if (SDValue Carry = getAsCarry(TLI, N1))
2863       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2864                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2865
2866   return SDValue();
2867 }
2868
2869 SDValue DAGCombiner::visitADDE(SDNode *N) {
2870   SDValue N0 = N->getOperand(0);
2871   SDValue N1 = N->getOperand(1);
2872   SDValue CarryIn = N->getOperand(2);
2873
2874   // canonicalize constant to RHS
2875   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2876   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2877   if (N0C && !N1C)
2878     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2879                        N1, N0, CarryIn);
2880
2881   // fold (adde x, y, false) -> (addc x, y)
2882   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2883     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2884
2885   return SDValue();
2886 }
2887
2888 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2889   SDValue N0 = N->getOperand(0);
2890   SDValue N1 = N->getOperand(1);
2891   SDValue CarryIn = N->getOperand(2);
2892   SDLoc DL(N);
2893
2894   // canonicalize constant to RHS
2895   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2896   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2897   if (N0C && !N1C)
2898     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2899
2900   // fold (addcarry x, y, false) -> (uaddo x, y)
2901   if (isNullConstant(CarryIn)) {
2902     if (!LegalOperations ||
2903         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2904       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2905   }
2906
2907   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2908   if (isNullConstant(N0) && isNullConstant(N1)) {
2909     EVT VT = N0.getValueType();
2910     EVT CarryVT = CarryIn.getValueType();
2911     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2912     AddToWorklist(CarryExt.getNode());
2913     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2914                                     DAG.getConstant(1, DL, VT)),
2915                      DAG.getConstant(0, DL, CarryVT));
2916   }
2917
2918   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2919     return Combined;
2920
2921   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2922     return Combined;
2923
2924   return SDValue();
2925 }
2926
2927 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2928   SDValue N0 = N->getOperand(0);
2929   SDValue N1 = N->getOperand(1);
2930   SDValue CarryIn = N->getOperand(2);
2931   SDLoc DL(N);
2932
2933   // canonicalize constant to RHS
2934   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2935   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2936   if (N0C && !N1C)
2937     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2938
2939   // fold (saddo_carry x, y, false) -> (saddo x, y)
2940   if (isNullConstant(CarryIn)) {
2941     if (!LegalOperations ||
2942         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2943       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2944   }
2945
2946   return SDValue();
2947 }
2948
2949 /**
2950  * If we are facing some sort of diamond carry propapagtion pattern try to
2951  * break it up to generate something like:
2952  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2953  *
2954  * The end result is usually an increase in operation required, but because the
2955  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2956  *
2957  * Patterns typically look something like
2958  *            (uaddo A, B)
2959  *             /       \
2960  *          Carry      Sum
2961  *            |          \
2962  *            | (addcarry *, 0, Z)
2963  *            |       /
2964  *             \   Carry
2965  *              |   /
2966  * (addcarry X, *, *)
2967  *
2968  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2969  * produce a combine with a single path for carry propagation.
2970  */
2971 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2972                                       SDValue X, SDValue Carry0, SDValue Carry1,
2973                                       SDNode *N) {
2974   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2975     return SDValue();
2976   if (Carry1.getOpcode() != ISD::UADDO)
2977     return SDValue();
2978
2979   SDValue Z;
2980
2981   /**
2982    * First look for a suitable Z. It will present itself in the form of
2983    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2984    */
2985   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2986       isNullConstant(Carry0.getOperand(1))) {
2987     Z = Carry0.getOperand(2);
2988   } else if (Carry0.getOpcode() == ISD::UADDO &&
2989              isOneConstant(Carry0.getOperand(1))) {
2990     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2991     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2992   } else {
2993     // We couldn't find a suitable Z.
2994     return SDValue();
2995   }
2996
2997
2998   auto cancelDiamond = [&](SDValue A,SDValue B) {
2999     SDLoc DL(N);
3000     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3001     Combiner.AddToWorklist(NewY.getNode());
3002     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3003                        DAG.getConstant(0, DL, X.getValueType()),
3004                        NewY.getValue(1));
3005   };
3006
3007   /**
3008    *      (uaddo A, B)
3009    *           |
3010    *          Sum
3011    *           |
3012    * (addcarry *, 0, Z)
3013    */
3014   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3015     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3016   }
3017
3018   /**
3019    * (addcarry A, 0, Z)
3020    *         |
3021    *        Sum
3022    *         |
3023    *  (uaddo *, B)
3024    */
3025   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3026     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3027   }
3028
3029   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3030     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3031   }
3032
3033   return SDValue();
3034 }
3035
3036 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3037 // match patterns like:
3038 //
3039 //          (uaddo A, B)            CarryIn
3040 //            |  \                     |
3041 //            |   \                    |
3042 //    PartialSum   PartialCarryOutX   /
3043 //            |        |             /
3044 //            |    ____|____________/
3045 //            |   /    |
3046 //     (uaddo *, *)    \________
3047 //       |  \                   \
3048 //       |   \                   |
3049 //       |    PartialCarryOutY   |
3050 //       |        \              |
3051 //       |         \            /
3052 //   AddCarrySum    |    ______/
3053 //                  |   /
3054 //   CarryOut = (or *, *)
3055 //
3056 // And generate ADDCARRY (or SUBCARRY) with two result values:
3057 //
3058 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3059 //
3060 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3061 // a single path for carry/borrow out propagation:
3062 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3063                                    const TargetLowering &TLI, SDValue Carry0,
3064                                    SDValue Carry1, SDNode *N) {
3065   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3066     return SDValue();
3067   unsigned Opcode = Carry0.getOpcode();
3068   if (Opcode != Carry1.getOpcode())
3069     return SDValue();
3070   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3071     return SDValue();
3072
3073   // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3074   // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3075   // the above ASCII art.)
3076   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3077       Carry1.getOperand(1) != Carry0.getValue(0))
3078     std::swap(Carry0, Carry1);
3079   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3080       Carry1.getOperand(1) != Carry0.getValue(0))
3081     return SDValue();
3082
3083   // The carry in value must be on the righthand side for subtraction.
3084   unsigned CarryInOperandNum =
3085       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3086   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3087     return SDValue();
3088   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3089
3090   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3091   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3092     return SDValue();
3093
3094   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3095   // TODO: make getAsCarry() aware of how partial carries are merged.
3096   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3097     return SDValue();
3098   CarryIn = CarryIn.getOperand(0);
3099   if (CarryIn.getValueType() != MVT::i1)
3100     return SDValue();
3101
3102   SDLoc DL(N);
3103   SDValue Merged =
3104       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3105                   Carry0.getOperand(1), CarryIn);
3106
3107   // Please note that because we have proven that the result of the UADDO/USUBO
3108   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3109   // therefore prove that if the first UADDO/USUBO overflows, the second
3110   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3111   // maximum value.
3112   //
3113   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3114   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3115   //
3116   // This is important because it means that OR and XOR can be used to merge
3117   // carry flags; and that AND can return a constant zero.
3118   //
3119   // TODO: match other operations that can merge flags (ADD, etc)
3120   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3121   if (N->getOpcode() == ISD::AND)
3122     return DAG.getConstant(0, DL, MVT::i1);
3123   return Merged.getValue(1);
3124 }
3125
3126 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3127                                        SDNode *N) {
3128   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3129   if (isBitwiseNot(N0))
3130     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3131       SDLoc DL(N);
3132       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3133                                 N0.getOperand(0), NotC);
3134       return CombineTo(
3135           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3136     }
3137
3138   // Iff the flag result is dead:
3139   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3140   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3141   // or the dependency between the instructions.
3142   if ((N0.getOpcode() == ISD::ADD ||
3143        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3144         N0.getValue(1) != CarryIn)) &&
3145       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3146     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3147                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3148
3149   /**
3150    * When one of the addcarry argument is itself a carry, we may be facing
3151    * a diamond carry propagation. In which case we try to transform the DAG
3152    * to ensure linear carry propagation if that is possible.
3153    */
3154   if (auto Y = getAsCarry(TLI, N1)) {
3155     // Because both are carries, Y and Z can be swapped.
3156     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3157       return R;
3158     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3159       return R;
3160   }
3161
3162   return SDValue();
3163 }
3164
3165 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3166 // clamp/truncation if necessary.
3167 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3168                                    SDValue RHS, SelectionDAG &DAG,
3169                                    const SDLoc &DL) {
3170   assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3171          "Illegal truncation");
3172
3173   if (DstVT == SrcVT)
3174     return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3175
3176   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3177   // clamping RHS.
3178   APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3179                                           DstVT.getScalarSizeInBits());
3180   if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3181     return SDValue();
3182
3183   SDValue SatLimit =
3184       DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3185                                            DstVT.getScalarSizeInBits()),
3186                       DL, SrcVT);
3187   RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3188   RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3189   LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3190   return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3191 }
3192
3193 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3194 // usubsat(a,b), optionally as a truncated type.
3195 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3196   if (N->getOpcode() != ISD::SUB ||
3197       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3198     return SDValue();
3199
3200   EVT SubVT = N->getValueType(0);
3201   SDValue Op0 = N->getOperand(0);
3202   SDValue Op1 = N->getOperand(1);
3203
3204   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3205   // they may be converted to usubsat(a,b).
3206   if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3207     SDValue MaxLHS = Op0.getOperand(0);
3208     SDValue MaxRHS = Op0.getOperand(1);
3209     if (MaxLHS == Op1)
3210       return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3211     if (MaxRHS == Op1)
3212       return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3213   }
3214
3215   if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3216     SDValue MinLHS = Op1.getOperand(0);
3217     SDValue MinRHS = Op1.getOperand(1);
3218     if (MinLHS == Op0)
3219       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3220     if (MinRHS == Op0)
3221       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3222   }
3223
3224   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3225   if (Op1.getOpcode() == ISD::TRUNCATE &&
3226       Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3227       Op1.getOperand(0).hasOneUse()) {
3228     SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3229     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3230     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3231       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3232                                  DAG, SDLoc(N));
3233     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3234       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3235                                  DAG, SDLoc(N));
3236   }
3237
3238   return SDValue();
3239 }
3240
3241 // Since it may not be valid to emit a fold to zero for vector initializers
3242 // check if we can before folding.
3243 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3244                              SelectionDAG &DAG, bool LegalOperations) {
3245   if (!VT.isVector())
3246     return DAG.getConstant(0, DL, VT);
3247   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3248     return DAG.getConstant(0, DL, VT);
3249   return SDValue();
3250 }
3251
3252 SDValue DAGCombiner::visitSUB(SDNode *N) {
3253   SDValue N0 = N->getOperand(0);
3254   SDValue N1 = N->getOperand(1);
3255   EVT VT = N0.getValueType();
3256   SDLoc DL(N);
3257
3258   // fold vector ops
3259   if (VT.isVector()) {
3260     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3261       return FoldedVOp;
3262
3263     // fold (sub x, 0) -> x, vector edition
3264     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3265       return N0;
3266   }
3267
3268   // fold (sub x, x) -> 0
3269   // FIXME: Refactor this and xor and other similar operations together.
3270   if (N0 == N1)
3271     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3272
3273   // fold (sub c1, c2) -> c3
3274   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3275     return C;
3276
3277   if (SDValue NewSel = foldBinOpIntoSelect(N))
3278     return NewSel;
3279
3280   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3281
3282   // fold (sub x, c) -> (add x, -c)
3283   if (N1C) {
3284     return DAG.getNode(ISD::ADD, DL, VT, N0,
3285                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3286   }
3287
3288   if (isNullOrNullSplat(N0)) {
3289     unsigned BitWidth = VT.getScalarSizeInBits();
3290     // Right-shifting everything out but the sign bit followed by negation is
3291     // the same as flipping arithmetic/logical shift type without the negation:
3292     // -(X >>u 31) -> (X >>s 31)
3293     // -(X >>s 31) -> (X >>u 31)
3294     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3295       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3296       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3297         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3298         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3299           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3300       }
3301     }
3302
3303     // 0 - X --> 0 if the sub is NUW.
3304     if (N->getFlags().hasNoUnsignedWrap())
3305       return N0;
3306
3307     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3308       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3309       // N1 must be 0 because negating the minimum signed value is undefined.
3310       if (N->getFlags().hasNoSignedWrap())
3311         return N0;
3312
3313       // 0 - X --> X if X is 0 or the minimum signed value.
3314       return N1;
3315     }
3316
3317     // Convert 0 - abs(x).
3318     SDValue Result;
3319     if (N1->getOpcode() == ISD::ABS &&
3320         !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3321         TLI.expandABS(N1.getNode(), Result, DAG, true))
3322       return Result;
3323
3324     // Fold neg(splat(neg(x)) -> splat(x)
3325     if (VT.isVector()) {
3326       SDValue N1S = DAG.getSplatValue(N1, true);
3327       if (N1S && N1S.getOpcode() == ISD::SUB &&
3328           isNullConstant(N1S.getOperand(0))) {
3329         if (VT.isScalableVector())
3330           return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3331         return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3332       }
3333     }
3334   }
3335
3336   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3337   if (isAllOnesOrAllOnesSplat(N0))
3338     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3339
3340   // fold (A - (0-B)) -> A+B
3341   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3342     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3343
3344   // fold A-(A-B) -> B
3345   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3346     return N1.getOperand(1);
3347
3348   // fold (A+B)-A -> B
3349   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3350     return N0.getOperand(1);
3351
3352   // fold (A+B)-B -> A
3353   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3354     return N0.getOperand(0);
3355
3356   // fold (A+C1)-C2 -> A+(C1-C2)
3357   if (N0.getOpcode() == ISD::ADD &&
3358       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3359       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3360     SDValue NewC =
3361         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3362     assert(NewC && "Constant folding failed");
3363     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3364   }
3365
3366   // fold C2-(A+C1) -> (C2-C1)-A
3367   if (N1.getOpcode() == ISD::ADD) {
3368     SDValue N11 = N1.getOperand(1);
3369     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3370         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3371       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3372       assert(NewC && "Constant folding failed");
3373       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3374     }
3375   }
3376
3377   // fold (A-C1)-C2 -> A-(C1+C2)
3378   if (N0.getOpcode() == ISD::SUB &&
3379       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3380       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3381     SDValue NewC =
3382         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3383     assert(NewC && "Constant folding failed");
3384     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3385   }
3386
3387   // fold (c1-A)-c2 -> (c1-c2)-A
3388   if (N0.getOpcode() == ISD::SUB &&
3389       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3390       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3391     SDValue NewC =
3392         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3393     assert(NewC && "Constant folding failed");
3394     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3395   }
3396
3397   // fold ((A+(B+or-C))-B) -> A+or-C
3398   if (N0.getOpcode() == ISD::ADD &&
3399       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3400        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3401       N0.getOperand(1).getOperand(0) == N1)
3402     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3403                        N0.getOperand(1).getOperand(1));
3404
3405   // fold ((A+(C+B))-B) -> A+C
3406   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3407       N0.getOperand(1).getOperand(1) == N1)
3408     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3409                        N0.getOperand(1).getOperand(0));
3410
3411   // fold ((A-(B-C))-C) -> A-B
3412   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3413       N0.getOperand(1).getOperand(1) == N1)
3414     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3415                        N0.getOperand(1).getOperand(0));
3416
3417   // fold (A-(B-C)) -> A+(C-B)
3418   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3419     return DAG.getNode(ISD::ADD, DL, VT, N0,
3420                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3421                                    N1.getOperand(0)));
3422
3423   // A - (A & B)  ->  A & (~B)
3424   if (N1.getOpcode() == ISD::AND) {
3425     SDValue A = N1.getOperand(0);
3426     SDValue B = N1.getOperand(1);
3427     if (A != N0)
3428       std::swap(A, B);
3429     if (A == N0 &&
3430         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3431       SDValue InvB =
3432           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3433       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3434     }
3435   }
3436
3437   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3438   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3439     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3440         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3441       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3442                                 N1.getOperand(0).getOperand(1),
3443                                 N1.getOperand(1));
3444       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3445     }
3446     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3447         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3448       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3449                                 N1.getOperand(0),
3450                                 N1.getOperand(1).getOperand(1));
3451       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3452     }
3453   }
3454
3455   // If either operand of a sub is undef, the result is undef
3456   if (N0.isUndef())
3457     return N0;
3458   if (N1.isUndef())
3459     return N1;
3460
3461   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3462     return V;
3463
3464   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3465     return V;
3466
3467   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3468     return V;
3469
3470   if (SDValue V = foldSubToUSubSat(VT, N))
3471     return V;
3472
3473   // (x - y) - 1  ->  add (xor y, -1), x
3474   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3475     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3476                               DAG.getAllOnesConstant(DL, VT));
3477     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3478   }
3479
3480   // Look for:
3481   //   sub y, (xor x, -1)
3482   // And if the target does not like this form then turn into:
3483   //   add (add x, y), 1
3484   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3485     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3486     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3487   }
3488
3489   // Hoist one-use addition by non-opaque constant:
3490   //   (x + C) - y  ->  (x - y) + C
3491   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3492       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3493     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3494     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3495   }
3496   // y - (x + C)  ->  (y - x) - C
3497   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3498       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3499     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3500     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3501   }
3502   // (x - C) - y  ->  (x - y) - C
3503   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3504   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3505       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3506     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3507     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3508   }
3509   // (C - x) - y  ->  C - (x + y)
3510   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3511       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3512     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3513     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3514   }
3515
3516   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3517   // rather than 'sub 0/1' (the sext should get folded).
3518   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3519   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3520       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3521       TLI.getBooleanContents(VT) ==
3522           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3523     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3524     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3525   }
3526
3527   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3528   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3529     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3530       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3531       SDValue S0 = N1.getOperand(0);
3532       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3533         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3534           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3535             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3536     }
3537   }
3538
3539   // If the relocation model supports it, consider symbol offsets.
3540   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3541     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3542       // fold (sub Sym, c) -> Sym-c
3543       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3544         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3545                                     GA->getOffset() -
3546                                         (uint64_t)N1C->getSExtValue());
3547       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3548       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3549         if (GA->getGlobal() == GB->getGlobal())
3550           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3551                                  DL, VT);
3552     }
3553
3554   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3555   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3556     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3557     if (TN->getVT() == MVT::i1) {
3558       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3559                                  DAG.getConstant(1, DL, VT));
3560       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3561     }
3562   }
3563
3564   // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3565   if (N1.getOpcode() == ISD::VSCALE) {
3566     const APInt &IntVal = N1.getConstantOperandAPInt(0);
3567     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3568   }
3569
3570   // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3571   if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3572     APInt NewStep = -N1.getConstantOperandAPInt(0);
3573     return DAG.getNode(ISD::ADD, DL, VT, N0,
3574                        DAG.getStepVector(DL, VT, NewStep));
3575   }
3576
3577   // Prefer an add for more folding potential and possibly better codegen:
3578   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3579   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3580     SDValue ShAmt = N1.getOperand(1);
3581     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3582     if (ShAmtC &&
3583         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3584       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3585       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3586     }
3587   }
3588
3589   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3590     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3591     if (SDValue Carry = getAsCarry(TLI, N0)) {
3592       SDValue X = N1;
3593       SDValue Zero = DAG.getConstant(0, DL, VT);
3594       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3595       return DAG.getNode(ISD::ADDCARRY, DL,
3596                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3597                          Carry);
3598     }
3599   }
3600
3601   return SDValue();
3602 }
3603
3604 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3605   SDValue N0 = N->getOperand(0);
3606   SDValue N1 = N->getOperand(1);
3607   EVT VT = N0.getValueType();
3608   SDLoc DL(N);
3609
3610   // fold vector ops
3611   if (VT.isVector()) {
3612     // TODO SimplifyVBinOp
3613
3614     // fold (sub_sat x, 0) -> x, vector edition
3615     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3616       return N0;
3617   }
3618
3619   // fold (sub_sat x, undef) -> 0
3620   if (N0.isUndef() || N1.isUndef())
3621     return DAG.getConstant(0, DL, VT);
3622
3623   // fold (sub_sat x, x) -> 0
3624   if (N0 == N1)
3625     return DAG.getConstant(0, DL, VT);
3626
3627   // fold (sub_sat c1, c2) -> c3
3628   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3629     return C;
3630
3631   // fold (sub_sat x, 0) -> x
3632   if (isNullConstant(N1))
3633     return N0;
3634
3635   return SDValue();
3636 }
3637
3638 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3639   SDValue N0 = N->getOperand(0);
3640   SDValue N1 = N->getOperand(1);
3641   EVT VT = N0.getValueType();
3642   SDLoc DL(N);
3643
3644   // If the flag result is dead, turn this into an SUB.
3645   if (!N->hasAnyUseOfValue(1))
3646     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3647                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3648
3649   // fold (subc x, x) -> 0 + no borrow
3650   if (N0 == N1)
3651     return CombineTo(N, DAG.getConstant(0, DL, VT),
3652                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3653
3654   // fold (subc x, 0) -> x + no borrow
3655   if (isNullConstant(N1))
3656     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3657
3658   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3659   if (isAllOnesConstant(N0))
3660     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3661                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3662
3663   return SDValue();
3664 }
3665
3666 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3667   SDValue N0 = N->getOperand(0);
3668   SDValue N1 = N->getOperand(1);
3669   EVT VT = N0.getValueType();
3670   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3671
3672   EVT CarryVT = N->getValueType(1);
3673   SDLoc DL(N);
3674
3675   // If the flag result is dead, turn this into an SUB.
3676   if (!N->hasAnyUseOfValue(1))
3677     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3678                      DAG.getUNDEF(CarryVT));
3679
3680   // fold (subo x, x) -> 0 + no borrow
3681   if (N0 == N1)
3682     return CombineTo(N, DAG.getConstant(0, DL, VT),
3683                      DAG.getConstant(0, DL, CarryVT));
3684
3685   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3686
3687   // fold (subox, c) -> (addo x, -c)
3688   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3689     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3690                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3691   }
3692
3693   // fold (subo x, 0) -> x + no borrow
3694   if (isNullOrNullSplat(N1))
3695     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3696
3697   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3698   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3699     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3700                      DAG.getConstant(0, DL, CarryVT));
3701
3702   return SDValue();
3703 }
3704
3705 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3706   SDValue N0 = N->getOperand(0);
3707   SDValue N1 = N->getOperand(1);
3708   SDValue CarryIn = N->getOperand(2);
3709
3710   // fold (sube x, y, false) -> (subc x, y)
3711   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3712     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3713
3714   return SDValue();
3715 }
3716
3717 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3718   SDValue N0 = N->getOperand(0);
3719   SDValue N1 = N->getOperand(1);
3720   SDValue CarryIn = N->getOperand(2);
3721
3722   // fold (subcarry x, y, false) -> (usubo x, y)
3723   if (isNullConstant(CarryIn)) {
3724     if (!LegalOperations ||
3725         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3726       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3727   }
3728
3729   return SDValue();
3730 }
3731
3732 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3733   SDValue N0 = N->getOperand(0);
3734   SDValue N1 = N->getOperand(1);
3735   SDValue CarryIn = N->getOperand(2);
3736
3737   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3738   if (isNullConstant(CarryIn)) {
3739     if (!LegalOperations ||
3740         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3741       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3742   }
3743
3744   return SDValue();
3745 }
3746
3747 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3748 // UMULFIXSAT here.
3749 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3750   SDValue N0 = N->getOperand(0);
3751   SDValue N1 = N->getOperand(1);
3752   SDValue Scale = N->getOperand(2);
3753   EVT VT = N0.getValueType();
3754
3755   // fold (mulfix x, undef, scale) -> 0
3756   if (N0.isUndef() || N1.isUndef())
3757     return DAG.getConstant(0, SDLoc(N), VT);
3758
3759   // Canonicalize constant to RHS (vector doesn't have to splat)
3760   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3761      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3762     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3763
3764   // fold (mulfix x, 0, scale) -> 0
3765   if (isNullConstant(N1))
3766     return DAG.getConstant(0, SDLoc(N), VT);
3767
3768   return SDValue();
3769 }
3770
3771 SDValue DAGCombiner::visitMUL(SDNode *N) {
3772   SDValue N0 = N->getOperand(0);
3773   SDValue N1 = N->getOperand(1);
3774   EVT VT = N0.getValueType();
3775
3776   // fold (mul x, undef) -> 0
3777   if (N0.isUndef() || N1.isUndef())
3778     return DAG.getConstant(0, SDLoc(N), VT);
3779
3780   bool N1IsConst = false;
3781   bool N1IsOpaqueConst = false;
3782   APInt ConstValue1;
3783
3784   // fold vector ops
3785   if (VT.isVector()) {
3786     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3787       return FoldedVOp;
3788
3789     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3790     assert((!N1IsConst ||
3791             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3792            "Splat APInt should be element width");
3793   } else {
3794     N1IsConst = isa<ConstantSDNode>(N1);
3795     if (N1IsConst) {
3796       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3797       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3798     }
3799   }
3800
3801   // fold (mul c1, c2) -> c1*c2
3802   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3803     return C;
3804
3805   // canonicalize constant to RHS (vector doesn't have to splat)
3806   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3807      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3808     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3809
3810   // fold (mul x, 0) -> 0
3811   if (N1IsConst && ConstValue1.isNullValue())
3812     return N1;
3813
3814   // fold (mul x, 1) -> x
3815   if (N1IsConst && ConstValue1.isOneValue())
3816     return N0;
3817
3818   if (SDValue NewSel = foldBinOpIntoSelect(N))
3819     return NewSel;
3820
3821   // fold (mul x, -1) -> 0-x
3822   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3823     SDLoc DL(N);
3824     return DAG.getNode(ISD::SUB, DL, VT,
3825                        DAG.getConstant(0, DL, VT), N0);
3826   }
3827
3828   // fold (mul x, (1 << c)) -> x << c
3829   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3830       DAG.isKnownToBeAPowerOfTwo(N1) &&
3831       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3832     SDLoc DL(N);
3833     SDValue LogBase2 = BuildLogBase2(N1, DL);
3834     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3835     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3836     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3837   }
3838
3839   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3840   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3841     unsigned Log2Val = (-ConstValue1).logBase2();
3842     SDLoc DL(N);
3843     // FIXME: If the input is something that is easily negated (e.g. a
3844     // single-use add), we should put the negate there.
3845     return DAG.getNode(ISD::SUB, DL, VT,
3846                        DAG.getConstant(0, DL, VT),
3847                        DAG.getNode(ISD::SHL, DL, VT, N0,
3848                             DAG.getConstant(Log2Val, DL,
3849                                       getShiftAmountTy(N0.getValueType()))));
3850   }
3851
3852   // Try to transform:
3853   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3854   // mul x, (2^N + 1) --> add (shl x, N), x
3855   // mul x, (2^N - 1) --> sub (shl x, N), x
3856   // Examples: x * 33 --> (x << 5) + x
3857   //           x * 15 --> (x << 4) - x
3858   //           x * -33 --> -((x << 5) + x)
3859   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3860   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3861   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3862   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3863   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3864   //           x * 0xf800 --> (x << 16) - (x << 11)
3865   //           x * -0x8800 --> -((x << 15) + (x << 11))
3866   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3867   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3868     // TODO: We could handle more general decomposition of any constant by
3869     //       having the target set a limit on number of ops and making a
3870     //       callback to determine that sequence (similar to sqrt expansion).
3871     unsigned MathOp = ISD::DELETED_NODE;
3872     APInt MulC = ConstValue1.abs();
3873     // The constant `2` should be treated as (2^0 + 1).
3874     unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3875     MulC.lshrInPlace(TZeros);
3876     if ((MulC - 1).isPowerOf2())
3877       MathOp = ISD::ADD;
3878     else if ((MulC + 1).isPowerOf2())
3879       MathOp = ISD::SUB;
3880
3881     if (MathOp != ISD::DELETED_NODE) {
3882       unsigned ShAmt =
3883           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3884       ShAmt += TZeros;
3885       assert(ShAmt < VT.getScalarSizeInBits() &&
3886              "multiply-by-constant generated out of bounds shift");
3887       SDLoc DL(N);
3888       SDValue Shl =
3889           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3890       SDValue R =
3891           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3892                                DAG.getNode(ISD::SHL, DL, VT, N0,
3893                                            DAG.getConstant(TZeros, DL, VT)))
3894                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
3895       if (ConstValue1.isNegative())
3896         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3897       return R;
3898     }
3899   }
3900
3901   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3902   if (N0.getOpcode() == ISD::SHL &&
3903       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3904       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3905     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3906     if (isConstantOrConstantVector(C3))
3907       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3908   }
3909
3910   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3911   // use.
3912   {
3913     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3914
3915     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3916     if (N0.getOpcode() == ISD::SHL &&
3917         isConstantOrConstantVector(N0.getOperand(1)) &&
3918         N0.getNode()->hasOneUse()) {
3919       Sh = N0; Y = N1;
3920     } else if (N1.getOpcode() == ISD::SHL &&
3921                isConstantOrConstantVector(N1.getOperand(1)) &&
3922                N1.getNode()->hasOneUse()) {
3923       Sh = N1; Y = N0;
3924     }
3925
3926     if (Sh.getNode()) {
3927       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3928       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3929     }
3930   }
3931
3932   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3933   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3934       N0.getOpcode() == ISD::ADD &&
3935       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3936       isMulAddWithConstProfitable(N, N0, N1))
3937       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3938                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3939                                      N0.getOperand(0), N1),
3940                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3941                                      N0.getOperand(1), N1));
3942
3943   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3944   if (N0.getOpcode() == ISD::VSCALE)
3945     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3946       const APInt &C0 = N0.getConstantOperandAPInt(0);
3947       const APInt &C1 = NC1->getAPIntValue();
3948       return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3949     }
3950
3951   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
3952   APInt MulVal;
3953   if (N0.getOpcode() == ISD::STEP_VECTOR)
3954     if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
3955       const APInt &C0 = N0.getConstantOperandAPInt(0);
3956       APInt NewStep = C0 * MulVal;
3957       return DAG.getStepVector(SDLoc(N), VT, NewStep);
3958     }
3959
3960   // Fold ((mul x, 0/undef) -> 0,
3961   //       (mul x, 1) -> x) -> x)
3962   // -> and(x, mask)
3963   // We can replace vectors with '0' and '1' factors with a clearing mask.
3964   if (VT.isFixedLengthVector()) {
3965     unsigned NumElts = VT.getVectorNumElements();
3966     SmallBitVector ClearMask;
3967     ClearMask.reserve(NumElts);
3968     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3969       if (!V || V->isNullValue()) {
3970         ClearMask.push_back(true);
3971         return true;
3972       }
3973       ClearMask.push_back(false);
3974       return V->isOne();
3975     };
3976     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3977         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3978       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
3979       SDLoc DL(N);
3980       EVT LegalSVT = N1.getOperand(0).getValueType();
3981       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3982       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3983       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3984       for (unsigned I = 0; I != NumElts; ++I)
3985         if (ClearMask[I])
3986           Mask[I] = Zero;
3987       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3988     }
3989   }
3990
3991   // reassociate mul
3992   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3993     return RMUL;
3994
3995   return SDValue();
3996 }
3997
3998 /// Return true if divmod libcall is available.
3999 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4000                                      const TargetLowering &TLI) {
4001   RTLIB::Libcall LC;
4002   EVT NodeType = Node->getValueType(0);
4003   if (!NodeType.isSimple())
4004     return false;
4005   switch (NodeType.getSimpleVT().SimpleTy) {
4006   default: return false; // No libcall for vector types.
4007   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
4008   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4009   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4010   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4011   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4012   }
4013
4014   return TLI.getLibcallName(LC) != nullptr;
4015 }
4016
4017 /// Issue divrem if both quotient and remainder are needed.
4018 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4019   if (Node->use_empty())
4020     return SDValue(); // This is a dead node, leave it alone.
4021
4022   unsigned Opcode = Node->getOpcode();
4023   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4024   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4025
4026   // DivMod lib calls can still work on non-legal types if using lib-calls.
4027   EVT VT = Node->getValueType(0);
4028   if (VT.isVector() || !VT.isInteger())
4029     return SDValue();
4030
4031   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4032     return SDValue();
4033
4034   // If DIVREM is going to get expanded into a libcall,
4035   // but there is no libcall available, then don't combine.
4036   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4037       !isDivRemLibcallAvailable(Node, isSigned, TLI))
4038     return SDValue();
4039
4040   // If div is legal, it's better to do the normal expansion
4041   unsigned OtherOpcode = 0;
4042   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4043     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4044     if (TLI.isOperationLegalOrCustom(Opcode, VT))
4045       return SDValue();
4046   } else {
4047     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4048     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4049       return SDValue();
4050   }
4051
4052   SDValue Op0 = Node->getOperand(0);
4053   SDValue Op1 = Node->getOperand(1);
4054   SDValue combined;
4055   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
4056          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
4057     SDNode *User = *UI;
4058     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4059         User->use_empty())
4060       continue;
4061     // Convert the other matching node(s), too;
4062     // otherwise, the DIVREM may get target-legalized into something
4063     // target-specific that we won't be able to recognize.
4064     unsigned UserOpc = User->getOpcode();
4065     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4066         User->getOperand(0) == Op0 &&
4067         User->getOperand(1) == Op1) {
4068       if (!combined) {
4069         if (UserOpc == OtherOpcode) {
4070           SDVTList VTs = DAG.getVTList(VT, VT);
4071           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4072         } else if (UserOpc == DivRemOpc) {
4073           combined = SDValue(User, 0);
4074         } else {
4075           assert(UserOpc == Opcode);
4076           continue;
4077         }
4078       }
4079       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4080         CombineTo(User, combined);
4081       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4082         CombineTo(User, combined.getValue(1));
4083     }
4084   }
4085   return combined;
4086 }
4087
4088 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4089   SDValue N0 = N->getOperand(0);
4090   SDValue N1 = N->getOperand(1);
4091   EVT VT = N->getValueType(0);
4092   SDLoc DL(N);
4093
4094   unsigned Opc = N->getOpcode();
4095   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4096   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4097
4098   // X / undef -> undef
4099   // X % undef -> undef
4100   // X / 0 -> undef
4101   // X % 0 -> undef
4102   // NOTE: This includes vectors where any divisor element is zero/undef.
4103   if (DAG.isUndef(Opc, {N0, N1}))
4104     return DAG.getUNDEF(VT);
4105
4106   // undef / X -> 0
4107   // undef % X -> 0
4108   if (N0.isUndef())
4109     return DAG.getConstant(0, DL, VT);
4110
4111   // 0 / X -> 0
4112   // 0 % X -> 0
4113   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4114   if (N0C && N0C->isNullValue())
4115     return N0;
4116
4117   // X / X -> 1
4118   // X % X -> 0
4119   if (N0 == N1)
4120     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4121
4122   // X / 1 -> X
4123   // X % 1 -> 0
4124   // If this is a boolean op (single-bit element type), we can't have
4125   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4126   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4127   // it's a 1.
4128   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4129     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4130
4131   return SDValue();
4132 }
4133
4134 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4135   SDValue N0 = N->getOperand(0);
4136   SDValue N1 = N->getOperand(1);
4137   EVT VT = N->getValueType(0);
4138   EVT CCVT = getSetCCResultType(VT);
4139
4140   // fold vector ops
4141   if (VT.isVector())
4142     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4143       return FoldedVOp;
4144
4145   SDLoc DL(N);
4146
4147   // fold (sdiv c1, c2) -> c1/c2
4148   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4149   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4150     return C;
4151
4152   // fold (sdiv X, -1) -> 0-X
4153   if (N1C && N1C->isAllOnesValue())
4154     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4155
4156   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4157   if (N1C && N1C->getAPIntValue().isMinSignedValue())
4158     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4159                          DAG.getConstant(1, DL, VT),
4160                          DAG.getConstant(0, DL, VT));
4161
4162   if (SDValue V = simplifyDivRem(N, DAG))
4163     return V;
4164
4165   if (SDValue NewSel = foldBinOpIntoSelect(N))
4166     return NewSel;
4167
4168   // If we know the sign bits of both operands are zero, strength reduce to a
4169   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4170   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4171     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4172
4173   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4174     // If the corresponding remainder node exists, update its users with
4175     // (Dividend - (Quotient * Divisor).
4176     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4177                                               { N0, N1 })) {
4178       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4179       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4180       AddToWorklist(Mul.getNode());
4181       AddToWorklist(Sub.getNode());
4182       CombineTo(RemNode, Sub);
4183     }
4184     return V;
4185   }
4186
4187   // sdiv, srem -> sdivrem
4188   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4189   // true.  Otherwise, we break the simplification logic in visitREM().
4190   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4191   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4192     if (SDValue DivRem = useDivRem(N))
4193         return DivRem;
4194
4195   return SDValue();
4196 }
4197
4198 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4199   SDLoc DL(N);
4200   EVT VT = N->getValueType(0);
4201   EVT CCVT = getSetCCResultType(VT);
4202   unsigned BitWidth = VT.getScalarSizeInBits();
4203
4204   // Helper for determining whether a value is a power-2 constant scalar or a
4205   // vector of such elements.
4206   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4207     if (C->isNullValue() || C->isOpaque())
4208       return false;
4209     if (C->getAPIntValue().isPowerOf2())
4210       return true;
4211     if ((-C->getAPIntValue()).isPowerOf2())
4212       return true;
4213     return false;
4214   };
4215
4216   // fold (sdiv X, pow2) -> simple ops after legalize
4217   // FIXME: We check for the exact bit here because the generic lowering gives
4218   // better results in that case. The target-specific lowering should learn how
4219   // to handle exact sdivs efficiently.
4220   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4221     // Target-specific implementation of sdiv x, pow2.
4222     if (SDValue Res = BuildSDIVPow2(N))
4223       return Res;
4224
4225     // Create constants that are functions of the shift amount value.
4226     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4227     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4228     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4229     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4230     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4231     if (!isConstantOrConstantVector(Inexact))
4232       return SDValue();
4233
4234     // Splat the sign bit into the register
4235     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4236                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4237     AddToWorklist(Sign.getNode());
4238
4239     // Add (N0 < 0) ? abs2 - 1 : 0;
4240     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4241     AddToWorklist(Srl.getNode());
4242     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4243     AddToWorklist(Add.getNode());
4244     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4245     AddToWorklist(Sra.getNode());
4246
4247     // Special case: (sdiv X, 1) -> X
4248     // Special Case: (sdiv X, -1) -> 0-X
4249     SDValue One = DAG.getConstant(1, DL, VT);
4250     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4251     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4252     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4253     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4254     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4255
4256     // If dividing by a positive value, we're done. Otherwise, the result must
4257     // be negated.
4258     SDValue Zero = DAG.getConstant(0, DL, VT);
4259     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4260
4261     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4262     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4263     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4264     return Res;
4265   }
4266
4267   // If integer divide is expensive and we satisfy the requirements, emit an
4268   // alternate sequence.  Targets may check function attributes for size/speed
4269   // trade-offs.
4270   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4271   if (isConstantOrConstantVector(N1) &&
4272       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4273     if (SDValue Op = BuildSDIV(N))
4274       return Op;
4275
4276   return SDValue();
4277 }
4278
4279 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4280   SDValue N0 = N->getOperand(0);
4281   SDValue N1 = N->getOperand(1);
4282   EVT VT = N->getValueType(0);
4283   EVT CCVT = getSetCCResultType(VT);
4284
4285   // fold vector ops
4286   if (VT.isVector())
4287     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4288       return FoldedVOp;
4289
4290   SDLoc DL(N);
4291
4292   // fold (udiv c1, c2) -> c1/c2
4293   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4294   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4295     return C;
4296
4297   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4298   if (N1C && N1C->getAPIntValue().isAllOnesValue())
4299     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4300                          DAG.getConstant(1, DL, VT),
4301                          DAG.getConstant(0, DL, VT));
4302
4303   if (SDValue V = simplifyDivRem(N, DAG))
4304     return V;
4305
4306   if (SDValue NewSel = foldBinOpIntoSelect(N))
4307     return NewSel;
4308
4309   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4310     // If the corresponding remainder node exists, update its users with
4311     // (Dividend - (Quotient * Divisor).
4312     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4313                                               { N0, N1 })) {
4314       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4315       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4316       AddToWorklist(Mul.getNode());
4317       AddToWorklist(Sub.getNode());
4318       CombineTo(RemNode, Sub);
4319     }
4320     return V;
4321   }
4322
4323   // sdiv, srem -> sdivrem
4324   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4325   // true.  Otherwise, we break the simplification logic in visitREM().
4326   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4327   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4328     if (SDValue DivRem = useDivRem(N))
4329         return DivRem;
4330
4331   return SDValue();
4332 }
4333
4334 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4335   SDLoc DL(N);
4336   EVT VT = N->getValueType(0);
4337
4338   // fold (udiv x, (1 << c)) -> x >>u c
4339   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4340       DAG.isKnownToBeAPowerOfTwo(N1)) {
4341     SDValue LogBase2 = BuildLogBase2(N1, DL);
4342     AddToWorklist(LogBase2.getNode());
4343
4344     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4345     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4346     AddToWorklist(Trunc.getNode());
4347     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4348   }
4349
4350   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4351   if (N1.getOpcode() == ISD::SHL) {
4352     SDValue N10 = N1.getOperand(0);
4353     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4354         DAG.isKnownToBeAPowerOfTwo(N10)) {
4355       SDValue LogBase2 = BuildLogBase2(N10, DL);
4356       AddToWorklist(LogBase2.getNode());
4357
4358       EVT ADDVT = N1.getOperand(1).getValueType();
4359       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4360       AddToWorklist(Trunc.getNode());
4361       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4362       AddToWorklist(Add.getNode());
4363       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4364     }
4365   }
4366
4367   // fold (udiv x, c) -> alternate
4368   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4369   if (isConstantOrConstantVector(N1) &&
4370       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4371     if (SDValue Op = BuildUDIV(N))
4372       return Op;
4373
4374   return SDValue();
4375 }
4376
4377 // handles ISD::SREM and ISD::UREM
4378 SDValue DAGCombiner::visitREM(SDNode *N) {
4379   unsigned Opcode = N->getOpcode();
4380   SDValue N0 = N->getOperand(0);
4381   SDValue N1 = N->getOperand(1);
4382   EVT VT = N->getValueType(0);
4383   EVT CCVT = getSetCCResultType(VT);
4384
4385   bool isSigned = (Opcode == ISD::SREM);
4386   SDLoc DL(N);
4387
4388   // fold (rem c1, c2) -> c1%c2
4389   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4390   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4391     return C;
4392
4393   // fold (urem X, -1) -> select(X == -1, 0, x)
4394   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4395     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4396                          DAG.getConstant(0, DL, VT), N0);
4397
4398   if (SDValue V = simplifyDivRem(N, DAG))
4399     return V;
4400
4401   if (SDValue NewSel = foldBinOpIntoSelect(N))
4402     return NewSel;
4403
4404   if (isSigned) {
4405     // If we know the sign bits of both operands are zero, strength reduce to a
4406     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4407     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4408       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4409   } else {
4410     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4411       // fold (urem x, pow2) -> (and x, pow2-1)
4412       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4413       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4414       AddToWorklist(Add.getNode());
4415       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4416     }
4417     if (N1.getOpcode() == ISD::SHL &&
4418         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4419       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4420       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4421       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4422       AddToWorklist(Add.getNode());
4423       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4424     }
4425   }
4426
4427   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4428
4429   // If X/C can be simplified by the division-by-constant logic, lower
4430   // X%C to the equivalent of X-X/C*C.
4431   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4432   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4433   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4434   // combine will not return a DIVREM.  Regardless, checking cheapness here
4435   // makes sense since the simplification results in fatter code.
4436   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4437     SDValue OptimizedDiv =
4438         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4439     if (OptimizedDiv.getNode()) {
4440       // If the equivalent Div node also exists, update its users.
4441       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4442       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4443                                                 { N0, N1 }))
4444         CombineTo(DivNode, OptimizedDiv);
4445       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4446       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4447       AddToWorklist(OptimizedDiv.getNode());
4448       AddToWorklist(Mul.getNode());
4449       return Sub;
4450     }
4451   }
4452
4453   // sdiv, srem -> sdivrem
4454   if (SDValue DivRem = useDivRem(N))
4455     return DivRem.getValue(1);
4456
4457   return SDValue();
4458 }
4459
4460 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4461   SDValue N0 = N->getOperand(0);
4462   SDValue N1 = N->getOperand(1);
4463   EVT VT = N->getValueType(0);
4464   SDLoc DL(N);
4465
4466   if (VT.isVector()) {
4467     // fold (mulhs x, 0) -> 0
4468     // do not return N0/N1, because undef node may exist.
4469     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4470         ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4471       return DAG.getConstant(0, DL, VT);
4472   }
4473
4474   // fold (mulhs c1, c2)
4475   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4476     return C;
4477
4478   // canonicalize constant to RHS.
4479   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4480       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4481     return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4482
4483   // fold (mulhs x, 0) -> 0
4484   if (isNullConstant(N1))
4485     return N1;
4486   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4487   if (isOneConstant(N1))
4488     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4489                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4490                                        getShiftAmountTy(N0.getValueType())));
4491
4492   // fold (mulhs x, undef) -> 0
4493   if (N0.isUndef() || N1.isUndef())
4494     return DAG.getConstant(0, DL, VT);
4495
4496   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4497   // plus a shift.
4498   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4499       !VT.isVector()) {
4500     MVT Simple = VT.getSimpleVT();
4501     unsigned SimpleSize = Simple.getSizeInBits();
4502     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4503     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4504       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4505       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4506       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4507       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4508             DAG.getConstant(SimpleSize, DL,
4509                             getShiftAmountTy(N1.getValueType())));
4510       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4511     }
4512   }
4513
4514   return SDValue();
4515 }
4516
4517 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4518   SDValue N0 = N->getOperand(0);
4519   SDValue N1 = N->getOperand(1);
4520   EVT VT = N->getValueType(0);
4521   SDLoc DL(N);
4522
4523   if (VT.isVector()) {
4524     // fold (mulhu x, 0) -> 0
4525     // do not return N0/N1, because undef node may exist.
4526     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4527         ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4528       return DAG.getConstant(0, DL, VT);
4529   }
4530
4531   // fold (mulhu c1, c2)
4532   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4533     return C;
4534
4535   // canonicalize constant to RHS.
4536   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4537       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4538     return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4539
4540   // fold (mulhu x, 0) -> 0
4541   if (isNullConstant(N1))
4542     return N1;
4543   // fold (mulhu x, 1) -> 0
4544   if (isOneConstant(N1))
4545     return DAG.getConstant(0, DL, N0.getValueType());
4546   // fold (mulhu x, undef) -> 0
4547   if (N0.isUndef() || N1.isUndef())
4548     return DAG.getConstant(0, DL, VT);
4549
4550   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4551   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4552       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4553     unsigned NumEltBits = VT.getScalarSizeInBits();
4554     SDValue LogBase2 = BuildLogBase2(N1, DL);
4555     SDValue SRLAmt = DAG.getNode(
4556         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4557     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4558     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4559     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4560   }
4561
4562   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4563   // plus a shift.
4564   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4565       !VT.isVector()) {
4566     MVT Simple = VT.getSimpleVT();
4567     unsigned SimpleSize = Simple.getSizeInBits();
4568     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4569     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4570       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4571       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4572       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4573       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4574             DAG.getConstant(SimpleSize, DL,
4575                             getShiftAmountTy(N1.getValueType())));
4576       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4577     }
4578   }
4579
4580   // Simplify the operands using demanded-bits information.
4581   // We don't have demanded bits support for MULHU so this just enables constant
4582   // folding based on known bits.
4583   if (SimplifyDemandedBits(SDValue(N, 0)))
4584     return SDValue(N, 0);
4585
4586   return SDValue();
4587 }
4588
4589 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4590 /// give the opcodes for the two computations that are being performed. Return
4591 /// true if a simplification was made.
4592 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4593                                                 unsigned HiOp) {
4594   // If the high half is not needed, just compute the low half.
4595   bool HiExists = N->hasAnyUseOfValue(1);
4596   if (!HiExists && (!LegalOperations ||
4597                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4598     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4599     return CombineTo(N, Res, Res);
4600   }
4601
4602   // If the low half is not needed, just compute the high half.
4603   bool LoExists = N->hasAnyUseOfValue(0);
4604   if (!LoExists && (!LegalOperations ||
4605                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4606     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4607     return CombineTo(N, Res, Res);
4608   }
4609
4610   // If both halves are used, return as it is.
4611   if (LoExists && HiExists)
4612     return SDValue();
4613
4614   // If the two computed results can be simplified separately, separate them.
4615   if (LoExists) {
4616     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4617     AddToWorklist(Lo.getNode());
4618     SDValue LoOpt = combine(Lo.getNode());
4619     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4620         (!LegalOperations ||
4621          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4622       return CombineTo(N, LoOpt, LoOpt);
4623   }
4624
4625   if (HiExists) {
4626     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4627     AddToWorklist(Hi.getNode());
4628     SDValue HiOpt = combine(Hi.getNode());
4629     if (HiOpt.getNode() && HiOpt != Hi &&
4630         (!LegalOperations ||
4631          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4632       return CombineTo(N, HiOpt, HiOpt);
4633   }
4634
4635   return SDValue();
4636 }
4637
4638 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4639   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4640     return Res;
4641
4642   EVT VT = N->getValueType(0);
4643   SDLoc DL(N);
4644
4645   // If the type is twice as wide is legal, transform the mulhu to a wider
4646   // multiply plus a shift.
4647   if (VT.isSimple() && !VT.isVector()) {
4648     MVT Simple = VT.getSimpleVT();
4649     unsigned SimpleSize = Simple.getSizeInBits();
4650     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4651     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4652       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4653       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4654       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4655       // Compute the high part as N1.
4656       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4657             DAG.getConstant(SimpleSize, DL,
4658                             getShiftAmountTy(Lo.getValueType())));
4659       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4660       // Compute the low part as N0.
4661       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4662       return CombineTo(N, Lo, Hi);
4663     }
4664   }
4665
4666   return SDValue();
4667 }
4668
4669 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4670   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4671     return Res;
4672
4673   EVT VT = N->getValueType(0);
4674   SDLoc DL(N);
4675
4676   // (umul_lohi N0, 0) -> (0, 0)
4677   if (isNullConstant(N->getOperand(1))) {
4678     SDValue Zero = DAG.getConstant(0, DL, VT);
4679     return CombineTo(N, Zero, Zero);
4680   }
4681
4682   // (umul_lohi N0, 1) -> (N0, 0)
4683   if (isOneConstant(N->getOperand(1))) {
4684     SDValue Zero = DAG.getConstant(0, DL, VT);
4685     return CombineTo(N, N->getOperand(0), Zero);
4686   }
4687
4688   // If the type is twice as wide is legal, transform the mulhu to a wider
4689   // multiply plus a shift.
4690   if (VT.isSimple() && !VT.isVector()) {
4691     MVT Simple = VT.getSimpleVT();
4692     unsigned SimpleSize = Simple.getSizeInBits();
4693     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4694     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4695       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4696       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4697       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4698       // Compute the high part as N1.
4699       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4700             DAG.getConstant(SimpleSize, DL,
4701                             getShiftAmountTy(Lo.getValueType())));
4702       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4703       // Compute the low part as N0.
4704       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4705       return CombineTo(N, Lo, Hi);
4706     }
4707   }
4708
4709   return SDValue();
4710 }
4711
4712 SDValue DAGCombiner::visitMULO(SDNode *N) {
4713   SDValue N0 = N->getOperand(0);
4714   SDValue N1 = N->getOperand(1);
4715   EVT VT = N0.getValueType();
4716   bool IsSigned = (ISD::SMULO == N->getOpcode());
4717
4718   EVT CarryVT = N->getValueType(1);
4719   SDLoc DL(N);
4720
4721   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4722   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4723
4724   // fold operation with constant operands.
4725   // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4726   // multiple results.
4727   if (N0C && N1C) {
4728     bool Overflow;
4729     APInt Result =
4730         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4731                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4732     return CombineTo(N, DAG.getConstant(Result, DL, VT),
4733                      DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4734   }
4735
4736   // canonicalize constant to RHS.
4737   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4738       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4739     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4740
4741   // fold (mulo x, 0) -> 0 + no carry out
4742   if (isNullOrNullSplat(N1))
4743     return CombineTo(N, DAG.getConstant(0, DL, VT),
4744                      DAG.getConstant(0, DL, CarryVT));
4745
4746   // (mulo x, 2) -> (addo x, x)
4747   if (N1C && N1C->getAPIntValue() == 2)
4748     return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4749                        N->getVTList(), N0, N0);
4750
4751   if (IsSigned) {
4752     // A 1 bit SMULO overflows if both inputs are 1.
4753     if (VT.getScalarSizeInBits() == 1) {
4754       SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4755       return CombineTo(N, And,
4756                        DAG.getSetCC(DL, CarryVT, And,
4757                                     DAG.getConstant(0, DL, VT), ISD::SETNE));
4758     }
4759
4760     // Multiplying n * m significant bits yields a result of n + m significant
4761     // bits. If the total number of significant bits does not exceed the
4762     // result bit width (minus 1), there is no overflow.
4763     unsigned SignBits = DAG.ComputeNumSignBits(N0);
4764     if (SignBits > 1)
4765       SignBits += DAG.ComputeNumSignBits(N1);
4766     if (SignBits > VT.getScalarSizeInBits() + 1)
4767       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4768                        DAG.getConstant(0, DL, CarryVT));
4769   } else {
4770     KnownBits N1Known = DAG.computeKnownBits(N1);
4771     KnownBits N0Known = DAG.computeKnownBits(N0);
4772     bool Overflow;
4773     (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4774     if (!Overflow)
4775       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4776                        DAG.getConstant(0, DL, CarryVT));
4777   }
4778
4779   return SDValue();
4780 }
4781
4782 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4783   SDValue N0 = N->getOperand(0);
4784   SDValue N1 = N->getOperand(1);
4785   EVT VT = N0.getValueType();
4786   unsigned Opcode = N->getOpcode();
4787
4788   // fold vector ops
4789   if (VT.isVector())
4790     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4791       return FoldedVOp;
4792
4793   // fold operation with constant operands.
4794   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4795     return C;
4796
4797   // canonicalize constant to RHS
4798   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4799       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4800     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4801
4802   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4803   // Only do this if the current op isn't legal and the flipped is.
4804   if (!TLI.isOperationLegal(Opcode, VT) &&
4805       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4806       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4807     unsigned AltOpcode;
4808     switch (Opcode) {
4809     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4810     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4811     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4812     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4813     default: llvm_unreachable("Unknown MINMAX opcode");
4814     }
4815     if (TLI.isOperationLegal(AltOpcode, VT))
4816       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4817   }
4818
4819   // Simplify the operands using demanded-bits information.
4820   if (SimplifyDemandedBits(SDValue(N, 0)))
4821     return SDValue(N, 0);
4822
4823   return SDValue();
4824 }
4825
4826 /// If this is a bitwise logic instruction and both operands have the same
4827 /// opcode, try to sink the other opcode after the logic instruction.
4828 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4829   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4830   EVT VT = N0.getValueType();
4831   unsigned LogicOpcode = N->getOpcode();
4832   unsigned HandOpcode = N0.getOpcode();
4833   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4834           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4835   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4836
4837   // Bail early if none of these transforms apply.
4838   if (N0.getNumOperands() == 0)
4839     return SDValue();
4840
4841   // FIXME: We should check number of uses of the operands to not increase
4842   //        the instruction count for all transforms.
4843
4844   // Handle size-changing casts.
4845   SDValue X = N0.getOperand(0);
4846   SDValue Y = N1.getOperand(0);
4847   EVT XVT = X.getValueType();
4848   SDLoc DL(N);
4849   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4850       HandOpcode == ISD::SIGN_EXTEND) {
4851     // If both operands have other uses, this transform would create extra
4852     // instructions without eliminating anything.
4853     if (!N0.hasOneUse() && !N1.hasOneUse())
4854       return SDValue();
4855     // We need matching integer source types.
4856     if (XVT != Y.getValueType())
4857       return SDValue();
4858     // Don't create an illegal op during or after legalization. Don't ever
4859     // create an unsupported vector op.
4860     if ((VT.isVector() || LegalOperations) &&
4861         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4862       return SDValue();
4863     // Avoid infinite looping with PromoteIntBinOp.
4864     // TODO: Should we apply desirable/legal constraints to all opcodes?
4865     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4866         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4867       return SDValue();
4868     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4869     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4870     return DAG.getNode(HandOpcode, DL, VT, Logic);
4871   }
4872
4873   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4874   if (HandOpcode == ISD::TRUNCATE) {
4875     // If both operands have other uses, this transform would create extra
4876     // instructions without eliminating anything.
4877     if (!N0.hasOneUse() && !N1.hasOneUse())
4878       return SDValue();
4879     // We need matching source types.
4880     if (XVT != Y.getValueType())
4881       return SDValue();
4882     // Don't create an illegal op during or after legalization.
4883     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4884       return SDValue();
4885     // Be extra careful sinking truncate. If it's free, there's no benefit in
4886     // widening a binop. Also, don't create a logic op on an illegal type.
4887     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4888       return SDValue();
4889     if (!TLI.isTypeLegal(XVT))
4890       return SDValue();
4891     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4892     return DAG.getNode(HandOpcode, DL, VT, Logic);
4893   }
4894
4895   // For binops SHL/SRL/SRA/AND:
4896   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4897   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4898        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4899       N0.getOperand(1) == N1.getOperand(1)) {
4900     // If either operand has other uses, this transform is not an improvement.
4901     if (!N0.hasOneUse() || !N1.hasOneUse())
4902       return SDValue();
4903     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4904     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4905   }
4906
4907   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4908   if (HandOpcode == ISD::BSWAP) {
4909     // If either operand has other uses, this transform is not an improvement.
4910     if (!N0.hasOneUse() || !N1.hasOneUse())
4911       return SDValue();
4912     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4913     return DAG.getNode(HandOpcode, DL, VT, Logic);
4914   }
4915
4916   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4917   // Only perform this optimization up until type legalization, before
4918   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4919   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4920   // we don't want to undo this promotion.
4921   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4922   // on scalars.
4923   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4924        Level <= AfterLegalizeTypes) {
4925     // Input types must be integer and the same.
4926     if (XVT.isInteger() && XVT == Y.getValueType() &&
4927         !(VT.isVector() && TLI.isTypeLegal(VT) &&
4928           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4929       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4930       return DAG.getNode(HandOpcode, DL, VT, Logic);
4931     }
4932   }
4933
4934   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4935   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4936   // If both shuffles use the same mask, and both shuffle within a single
4937   // vector, then it is worthwhile to move the swizzle after the operation.
4938   // The type-legalizer generates this pattern when loading illegal
4939   // vector types from memory. In many cases this allows additional shuffle
4940   // optimizations.
4941   // There are other cases where moving the shuffle after the xor/and/or
4942   // is profitable even if shuffles don't perform a swizzle.
4943   // If both shuffles use the same mask, and both shuffles have the same first
4944   // or second operand, then it might still be profitable to move the shuffle
4945   // after the xor/and/or operation.
4946   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4947     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4948     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4949     assert(X.getValueType() == Y.getValueType() &&
4950            "Inputs to shuffles are not the same type");
4951
4952     // Check that both shuffles use the same mask. The masks are known to be of
4953     // the same length because the result vector type is the same.
4954     // Check also that shuffles have only one use to avoid introducing extra
4955     // instructions.
4956     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4957         !SVN0->getMask().equals(SVN1->getMask()))
4958       return SDValue();
4959
4960     // Don't try to fold this node if it requires introducing a
4961     // build vector of all zeros that might be illegal at this stage.
4962     SDValue ShOp = N0.getOperand(1);
4963     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4964       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4965
4966     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4967     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4968       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4969                                   N0.getOperand(0), N1.getOperand(0));
4970       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4971     }
4972
4973     // Don't try to fold this node if it requires introducing a
4974     // build vector of all zeros that might be illegal at this stage.
4975     ShOp = N0.getOperand(0);
4976     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4977       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4978
4979     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4980     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4981       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4982                                   N1.getOperand(1));
4983       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4984     }
4985   }
4986
4987   return SDValue();
4988 }
4989
4990 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4991 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4992                                        const SDLoc &DL) {
4993   SDValue LL, LR, RL, RR, N0CC, N1CC;
4994   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4995       !isSetCCEquivalent(N1, RL, RR, N1CC))
4996     return SDValue();
4997
4998   assert(N0.getValueType() == N1.getValueType() &&
4999          "Unexpected operand types for bitwise logic op");
5000   assert(LL.getValueType() == LR.getValueType() &&
5001          RL.getValueType() == RR.getValueType() &&
5002          "Unexpected operand types for setcc");
5003
5004   // If we're here post-legalization or the logic op type is not i1, the logic
5005   // op type must match a setcc result type. Also, all folds require new
5006   // operations on the left and right operands, so those types must match.
5007   EVT VT = N0.getValueType();
5008   EVT OpVT = LL.getValueType();
5009   if (LegalOperations || VT.getScalarType() != MVT::i1)
5010     if (VT != getSetCCResultType(OpVT))
5011       return SDValue();
5012   if (OpVT != RL.getValueType())
5013     return SDValue();
5014
5015   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5016   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5017   bool IsInteger = OpVT.isInteger();
5018   if (LR == RR && CC0 == CC1 && IsInteger) {
5019     bool IsZero = isNullOrNullSplat(LR);
5020     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5021
5022     // All bits clear?
5023     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5024     // All sign bits clear?
5025     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5026     // Any bits set?
5027     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5028     // Any sign bits set?
5029     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5030
5031     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
5032     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5033     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
5034     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
5035     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5036       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5037       AddToWorklist(Or.getNode());
5038       return DAG.getSetCC(DL, VT, Or, LR, CC1);
5039     }
5040
5041     // All bits set?
5042     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5043     // All sign bits set?
5044     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5045     // Any bits clear?
5046     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5047     // Any sign bits clear?
5048     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5049
5050     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5051     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
5052     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5053     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
5054     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5055       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5056       AddToWorklist(And.getNode());
5057       return DAG.getSetCC(DL, VT, And, LR, CC1);
5058     }
5059   }
5060
5061   // TODO: What is the 'or' equivalent of this fold?
5062   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5063   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5064       IsInteger && CC0 == ISD::SETNE &&
5065       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5066        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5067     SDValue One = DAG.getConstant(1, DL, OpVT);
5068     SDValue Two = DAG.getConstant(2, DL, OpVT);
5069     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5070     AddToWorklist(Add.getNode());
5071     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5072   }
5073
5074   // Try more general transforms if the predicates match and the only user of
5075   // the compares is the 'and' or 'or'.
5076   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5077       N0.hasOneUse() && N1.hasOneUse()) {
5078     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5079     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5080     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5081       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5082       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5083       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5084       SDValue Zero = DAG.getConstant(0, DL, OpVT);
5085       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5086     }
5087
5088     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5089     // TODO - support non-uniform vector amounts.
5090     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5091       // Match a shared variable operand and 2 non-opaque constant operands.
5092       ConstantSDNode *C0 = isConstOrConstSplat(LR);
5093       ConstantSDNode *C1 = isConstOrConstSplat(RR);
5094       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5095         const APInt &CMax =
5096             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5097         const APInt &CMin =
5098             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5099         // The difference of the constants must be a single bit.
5100         if ((CMax - CMin).isPowerOf2()) {
5101           // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5102           // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5103           SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5104           SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5105           SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5106           SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5107           SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5108           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5109           SDValue Zero = DAG.getConstant(0, DL, OpVT);
5110           return DAG.getSetCC(DL, VT, And, Zero, CC0);
5111         }
5112       }
5113     }
5114   }
5115
5116   // Canonicalize equivalent operands to LL == RL.
5117   if (LL == RR && LR == RL) {
5118     CC1 = ISD::getSetCCSwappedOperands(CC1);
5119     std::swap(RL, RR);
5120   }
5121
5122   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5123   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5124   if (LL == RL && LR == RR) {
5125     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5126                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5127     if (NewCC != ISD::SETCC_INVALID &&
5128         (!LegalOperations ||
5129          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5130           TLI.isOperationLegal(ISD::SETCC, OpVT))))
5131       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5132   }
5133
5134   return SDValue();
5135 }
5136
5137 /// This contains all DAGCombine rules which reduce two values combined by
5138 /// an And operation to a single value. This makes them reusable in the context
5139 /// of visitSELECT(). Rules involving constants are not included as
5140 /// visitSELECT() already handles those cases.
5141 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5142   EVT VT = N1.getValueType();
5143   SDLoc DL(N);
5144
5145   // fold (and x, undef) -> 0
5146   if (N0.isUndef() || N1.isUndef())
5147     return DAG.getConstant(0, DL, VT);
5148
5149   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5150     return V;
5151
5152   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5153       VT.getSizeInBits() <= 64) {
5154     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5155       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5156         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5157         // immediate for an add, but it is legal if its top c2 bits are set,
5158         // transform the ADD so the immediate doesn't need to be materialized
5159         // in a register.
5160         APInt ADDC = ADDI->getAPIntValue();
5161         APInt SRLC = SRLI->getAPIntValue();
5162         if (ADDC.getMinSignedBits() <= 64 &&
5163             SRLC.ult(VT.getSizeInBits()) &&
5164             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5165           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5166                                              SRLC.getZExtValue());
5167           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5168             ADDC |= Mask;
5169             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5170               SDLoc DL0(N0);
5171               SDValue NewAdd =
5172                 DAG.getNode(ISD::ADD, DL0, VT,
5173                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5174               CombineTo(N0.getNode(), NewAdd);
5175               // Return N so it doesn't get rechecked!
5176               return SDValue(N, 0);
5177             }
5178           }
5179         }
5180       }
5181     }
5182   }
5183
5184   // Reduce bit extract of low half of an integer to the narrower type.
5185   // (and (srl i64:x, K), KMask) ->
5186   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5187   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5188     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5189       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5190         unsigned Size = VT.getSizeInBits();
5191         const APInt &AndMask = CAnd->getAPIntValue();
5192         unsigned ShiftBits = CShift->getZExtValue();
5193
5194         // Bail out, this node will probably disappear anyway.
5195         if (ShiftBits == 0)
5196           return SDValue();
5197
5198         unsigned MaskBits = AndMask.countTrailingOnes();
5199         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5200
5201         if (AndMask.isMask() &&
5202             // Required bits must not span the two halves of the integer and
5203             // must fit in the half size type.
5204             (ShiftBits + MaskBits <= Size / 2) &&
5205             TLI.isNarrowingProfitable(VT, HalfVT) &&
5206             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5207             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5208             TLI.isTruncateFree(VT, HalfVT) &&
5209             TLI.isZExtFree(HalfVT, VT)) {
5210           // The isNarrowingProfitable is to avoid regressions on PPC and
5211           // AArch64 which match a few 64-bit bit insert / bit extract patterns
5212           // on downstream users of this. Those patterns could probably be
5213           // extended to handle extensions mixed in.
5214
5215           SDValue SL(N0);
5216           assert(MaskBits <= Size);
5217
5218           // Extracting the highest bit of the low half.
5219           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5220           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5221                                       N0.getOperand(0));
5222
5223           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5224           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5225           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5226           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5227           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5228         }
5229       }
5230     }
5231   }
5232
5233   return SDValue();
5234 }
5235
5236 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5237                                    EVT LoadResultTy, EVT &ExtVT) {
5238   if (!AndC->getAPIntValue().isMask())
5239     return false;
5240
5241   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5242
5243   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5244   EVT LoadedVT = LoadN->getMemoryVT();
5245
5246   if (ExtVT == LoadedVT &&
5247       (!LegalOperations ||
5248        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5249     // ZEXTLOAD will match without needing to change the size of the value being
5250     // loaded.
5251     return true;
5252   }
5253
5254   // Do not change the width of a volatile or atomic loads.
5255   if (!LoadN->isSimple())
5256     return false;
5257
5258   // Do not generate loads of non-round integer types since these can
5259   // be expensive (and would be wrong if the type is not byte sized).
5260   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5261     return false;
5262
5263   if (LegalOperations &&
5264       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5265     return false;
5266
5267   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5268     return false;
5269
5270   return true;
5271 }
5272
5273 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5274                                     ISD::LoadExtType ExtType, EVT &MemVT,
5275                                     unsigned ShAmt) {
5276   if (!LDST)
5277     return false;
5278   // Only allow byte offsets.
5279   if (ShAmt % 8)
5280     return false;
5281
5282   // Do not generate loads of non-round integer types since these can
5283   // be expensive (and would be wrong if the type is not byte sized).
5284   if (!MemVT.isRound())
5285     return false;
5286
5287   // Don't change the width of a volatile or atomic loads.
5288   if (!LDST->isSimple())
5289     return false;
5290
5291   EVT LdStMemVT = LDST->getMemoryVT();
5292
5293   // Bail out when changing the scalable property, since we can't be sure that
5294   // we're actually narrowing here.
5295   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5296     return false;
5297
5298   // Verify that we are actually reducing a load width here.
5299   if (LdStMemVT.bitsLT(MemVT))
5300     return false;
5301
5302   // Ensure that this isn't going to produce an unsupported memory access.
5303   if (ShAmt) {
5304     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5305     const unsigned ByteShAmt = ShAmt / 8;
5306     const Align LDSTAlign = LDST->getAlign();
5307     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5308     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5309                                 LDST->getAddressSpace(), NarrowAlign,
5310                                 LDST->getMemOperand()->getFlags()))
5311       return false;
5312   }
5313
5314   // It's not possible to generate a constant of extended or untyped type.
5315   EVT PtrType = LDST->getBasePtr().getValueType();
5316   if (PtrType == MVT::Untyped || PtrType.isExtended())
5317     return false;
5318
5319   if (isa<LoadSDNode>(LDST)) {
5320     LoadSDNode *Load = cast<LoadSDNode>(LDST);
5321     // Don't transform one with multiple uses, this would require adding a new
5322     // load.
5323     if (!SDValue(Load, 0).hasOneUse())
5324       return false;
5325
5326     if (LegalOperations &&
5327         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5328       return false;
5329
5330     // For the transform to be legal, the load must produce only two values
5331     // (the value loaded and the chain).  Don't transform a pre-increment
5332     // load, for example, which produces an extra value.  Otherwise the
5333     // transformation is not equivalent, and the downstream logic to replace
5334     // uses gets things wrong.
5335     if (Load->getNumValues() > 2)
5336       return false;
5337
5338     // If the load that we're shrinking is an extload and we're not just
5339     // discarding the extension we can't simply shrink the load. Bail.
5340     // TODO: It would be possible to merge the extensions in some cases.
5341     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5342         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5343       return false;
5344
5345     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5346       return false;
5347   } else {
5348     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5349     StoreSDNode *Store = cast<StoreSDNode>(LDST);
5350     // Can't write outside the original store
5351     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5352       return false;
5353
5354     if (LegalOperations &&
5355         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5356       return false;
5357   }
5358   return true;
5359 }
5360
5361 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5362                                     SmallVectorImpl<LoadSDNode*> &Loads,
5363                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5364                                     ConstantSDNode *Mask,
5365                                     SDNode *&NodeToMask) {
5366   // Recursively search for the operands, looking for loads which can be
5367   // narrowed.
5368   for (SDValue Op : N->op_values()) {
5369     if (Op.getValueType().isVector())
5370       return false;
5371
5372     // Some constants may need fixing up later if they are too large.
5373     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5374       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5375           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5376         NodesWithConsts.insert(N);
5377       continue;
5378     }
5379
5380     if (!Op.hasOneUse())
5381       return false;
5382
5383     switch(Op.getOpcode()) {
5384     case ISD::LOAD: {
5385       auto *Load = cast<LoadSDNode>(Op);
5386       EVT ExtVT;
5387       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5388           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5389
5390         // ZEXTLOAD is already small enough.
5391         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5392             ExtVT.bitsGE(Load->getMemoryVT()))
5393           continue;
5394
5395         // Use LE to convert equal sized loads to zext.
5396         if (ExtVT.bitsLE(Load->getMemoryVT()))
5397           Loads.push_back(Load);
5398
5399         continue;
5400       }
5401       return false;
5402     }
5403     case ISD::ZERO_EXTEND:
5404     case ISD::AssertZext: {
5405       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5406       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5407       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5408         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5409         Op.getOperand(0).getValueType();
5410
5411       // We can accept extending nodes if the mask is wider or an equal
5412       // width to the original type.
5413       if (ExtVT.bitsGE(VT))
5414         continue;
5415       break;
5416     }
5417     case ISD::OR:
5418     case ISD::XOR:
5419     case ISD::AND:
5420       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5421                              NodeToMask))
5422         return false;
5423       continue;
5424     }
5425
5426     // Allow one node which will masked along with any loads found.
5427     if (NodeToMask)
5428       return false;
5429
5430     // Also ensure that the node to be masked only produces one data result.
5431     NodeToMask = Op.getNode();
5432     if (NodeToMask->getNumValues() > 1) {
5433       bool HasValue = false;
5434       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5435         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5436         if (VT != MVT::Glue && VT != MVT::Other) {
5437           if (HasValue) {
5438             NodeToMask = nullptr;
5439             return false;
5440           }
5441           HasValue = true;
5442         }
5443       }
5444       assert(HasValue && "Node to be masked has no data result?");
5445     }
5446   }
5447   return true;
5448 }
5449
5450 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5451   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5452   if (!Mask)
5453     return false;
5454
5455   if (!Mask->getAPIntValue().isMask())
5456     return false;
5457
5458   // No need to do anything if the and directly uses a load.
5459   if (isa<LoadSDNode>(N->getOperand(0)))
5460     return false;
5461
5462   SmallVector<LoadSDNode*, 8> Loads;
5463   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5464   SDNode *FixupNode = nullptr;
5465   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5466     if (Loads.size() == 0)
5467       return false;
5468
5469     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5470     SDValue MaskOp = N->getOperand(1);
5471
5472     // If it exists, fixup the single node we allow in the tree that needs
5473     // masking.
5474     if (FixupNode) {
5475       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5476       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5477                                 FixupNode->getValueType(0),
5478                                 SDValue(FixupNode, 0), MaskOp);
5479       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5480       if (And.getOpcode() == ISD ::AND)
5481         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5482     }
5483
5484     // Narrow any constants that need it.
5485     for (auto *LogicN : NodesWithConsts) {
5486       SDValue Op0 = LogicN->getOperand(0);
5487       SDValue Op1 = LogicN->getOperand(1);
5488
5489       if (isa<ConstantSDNode>(Op0))
5490           std::swap(Op0, Op1);
5491
5492       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5493                                 Op1, MaskOp);
5494
5495       DAG.UpdateNodeOperands(LogicN, Op0, And);
5496     }
5497
5498     // Create narrow loads.
5499     for (auto *Load : Loads) {
5500       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5501       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5502                                 SDValue(Load, 0), MaskOp);
5503       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5504       if (And.getOpcode() == ISD ::AND)
5505         And = SDValue(
5506             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5507       SDValue NewLoad = ReduceLoadWidth(And.getNode());
5508       assert(NewLoad &&
5509              "Shouldn't be masking the load if it can't be narrowed");
5510       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5511     }
5512     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5513     return true;
5514   }
5515   return false;
5516 }
5517
5518 // Unfold
5519 //    x &  (-1 'logical shift' y)
5520 // To
5521 //    (x 'opposite logical shift' y) 'logical shift' y
5522 // if it is better for performance.
5523 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5524   assert(N->getOpcode() == ISD::AND);
5525
5526   SDValue N0 = N->getOperand(0);
5527   SDValue N1 = N->getOperand(1);
5528
5529   // Do we actually prefer shifts over mask?
5530   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5531     return SDValue();
5532
5533   // Try to match  (-1 '[outer] logical shift' y)
5534   unsigned OuterShift;
5535   unsigned InnerShift; // The opposite direction to the OuterShift.
5536   SDValue Y;           // Shift amount.
5537   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5538     if (!M.hasOneUse())
5539       return false;
5540     OuterShift = M->getOpcode();
5541     if (OuterShift == ISD::SHL)
5542       InnerShift = ISD::SRL;
5543     else if (OuterShift == ISD::SRL)
5544       InnerShift = ISD::SHL;
5545     else
5546       return false;
5547     if (!isAllOnesConstant(M->getOperand(0)))
5548       return false;
5549     Y = M->getOperand(1);
5550     return true;
5551   };
5552
5553   SDValue X;
5554   if (matchMask(N1))
5555     X = N0;
5556   else if (matchMask(N0))
5557     X = N1;
5558   else
5559     return SDValue();
5560
5561   SDLoc DL(N);
5562   EVT VT = N->getValueType(0);
5563
5564   //     tmp = x   'opposite logical shift' y
5565   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5566   //     ret = tmp 'logical shift' y
5567   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5568
5569   return T1;
5570 }
5571
5572 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5573 /// For a target with a bit test, this is expected to become test + set and save
5574 /// at least 1 instruction.
5575 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5576   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5577
5578   // This is probably not worthwhile without a supported type.
5579   EVT VT = And->getValueType(0);
5580   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5581   if (!TLI.isTypeLegal(VT))
5582     return SDValue();
5583
5584   // Look through an optional extension and find a 'not'.
5585   // TODO: Should we favor test+set even without the 'not' op?
5586   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5587   if (Not.getOpcode() == ISD::ANY_EXTEND)
5588     Not = Not.getOperand(0);
5589   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5590     return SDValue();
5591
5592   // Look though an optional truncation. The source operand may not be the same
5593   // type as the original 'and', but that is ok because we are masking off
5594   // everything but the low bit.
5595   SDValue Srl = Not.getOperand(0);
5596   if (Srl.getOpcode() == ISD::TRUNCATE)
5597     Srl = Srl.getOperand(0);
5598
5599   // Match a shift-right by constant.
5600   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5601       !isa<ConstantSDNode>(Srl.getOperand(1)))
5602     return SDValue();
5603
5604   // We might have looked through casts that make this transform invalid.
5605   // TODO: If the source type is wider than the result type, do the mask and
5606   //       compare in the source type.
5607   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5608   unsigned VTBitWidth = VT.getSizeInBits();
5609   if (ShiftAmt.uge(VTBitWidth))
5610     return SDValue();
5611
5612   // Turn this into a bit-test pattern using mask op + setcc:
5613   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5614   SDLoc DL(And);
5615   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5616   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5617   SDValue Mask = DAG.getConstant(
5618       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5619   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5620   SDValue Zero = DAG.getConstant(0, DL, VT);
5621   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5622   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5623 }
5624
5625 SDValue DAGCombiner::visitAND(SDNode *N) {
5626   SDValue N0 = N->getOperand(0);
5627   SDValue N1 = N->getOperand(1);
5628   EVT VT = N1.getValueType();
5629
5630   // x & x --> x
5631   if (N0 == N1)
5632     return N0;
5633
5634   // fold vector ops
5635   if (VT.isVector()) {
5636     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5637       return FoldedVOp;
5638
5639     // fold (and x, 0) -> 0, vector edition
5640     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
5641       // do not return N0, because undef node may exist in N0
5642       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5643                              SDLoc(N), N0.getValueType());
5644     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5645       // do not return N1, because undef node may exist in N1
5646       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5647                              SDLoc(N), N1.getValueType());
5648
5649     // fold (and x, -1) -> x, vector edition
5650     if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
5651       return N1;
5652     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
5653       return N0;
5654
5655     // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5656     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5657     auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5658     if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5659         N0.hasOneUse() && N1.hasOneUse()) {
5660       EVT LoadVT = MLoad->getMemoryVT();
5661       EVT ExtVT = VT;
5662       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5663         // For this AND to be a zero extension of the masked load the elements
5664         // of the BuildVec must mask the bottom bits of the extended element
5665         // type
5666         if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5667           uint64_t ElementSize =
5668               LoadVT.getVectorElementType().getScalarSizeInBits();
5669           if (Splat->getAPIntValue().isMask(ElementSize)) {
5670             return DAG.getMaskedLoad(
5671                 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5672                 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5673                 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5674                 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5675           }
5676         }
5677       }
5678     }
5679   }
5680
5681   // fold (and c1, c2) -> c1&c2
5682   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5683   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5684     return C;
5685
5686   // canonicalize constant to RHS
5687   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5688       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5689     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5690
5691   // fold (and x, -1) -> x
5692   if (isAllOnesConstant(N1))
5693     return N0;
5694
5695   // if (and x, c) is known to be zero, return 0
5696   unsigned BitWidth = VT.getScalarSizeInBits();
5697   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5698                                    APInt::getAllOnesValue(BitWidth)))
5699     return DAG.getConstant(0, SDLoc(N), VT);
5700
5701   if (SDValue NewSel = foldBinOpIntoSelect(N))
5702     return NewSel;
5703
5704   // reassociate and
5705   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5706     return RAND;
5707
5708   // Try to convert a constant mask AND into a shuffle clear mask.
5709   if (VT.isVector())
5710     if (SDValue Shuffle = XformToShuffleWithZero(N))
5711       return Shuffle;
5712
5713   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5714     return Combined;
5715
5716   // fold (and (or x, C), D) -> D if (C & D) == D
5717   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5718     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5719   };
5720   if (N0.getOpcode() == ISD::OR &&
5721       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5722     return N1;
5723   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5724   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5725     SDValue N0Op0 = N0.getOperand(0);
5726     APInt Mask = ~N1C->getAPIntValue();
5727     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5728     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5729       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5730                                  N0.getValueType(), N0Op0);
5731
5732       // Replace uses of the AND with uses of the Zero extend node.
5733       CombineTo(N, Zext);
5734
5735       // We actually want to replace all uses of the any_extend with the
5736       // zero_extend, to avoid duplicating things.  This will later cause this
5737       // AND to be folded.
5738       CombineTo(N0.getNode(), Zext);
5739       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5740     }
5741   }
5742
5743   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5744   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5745   // already be zero by virtue of the width of the base type of the load.
5746   //
5747   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5748   // more cases.
5749   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5750        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5751        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5752        N0.getOperand(0).getResNo() == 0) ||
5753       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5754     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5755                                          N0 : N0.getOperand(0) );
5756
5757     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5758     // This can be a pure constant or a vector splat, in which case we treat the
5759     // vector as a scalar and use the splat value.
5760     APInt Constant = APInt::getNullValue(1);
5761     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5762       Constant = C->getAPIntValue();
5763     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5764       APInt SplatValue, SplatUndef;
5765       unsigned SplatBitSize;
5766       bool HasAnyUndefs;
5767       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5768                                              SplatBitSize, HasAnyUndefs);
5769       if (IsSplat) {
5770         // Undef bits can contribute to a possible optimisation if set, so
5771         // set them.
5772         SplatValue |= SplatUndef;
5773
5774         // The splat value may be something like "0x00FFFFFF", which means 0 for
5775         // the first vector value and FF for the rest, repeating. We need a mask
5776         // that will apply equally to all members of the vector, so AND all the
5777         // lanes of the constant together.
5778         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5779
5780         // If the splat value has been compressed to a bitlength lower
5781         // than the size of the vector lane, we need to re-expand it to
5782         // the lane size.
5783         if (EltBitWidth > SplatBitSize)
5784           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5785                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5786             SplatValue |= SplatValue.shl(SplatBitSize);
5787
5788         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5789         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5790         if ((SplatBitSize % EltBitWidth) == 0) {
5791           Constant = APInt::getAllOnesValue(EltBitWidth);
5792           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5793             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5794         }
5795       }
5796     }
5797
5798     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5799     // actually legal and isn't going to get expanded, else this is a false
5800     // optimisation.
5801     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5802                                                     Load->getValueType(0),
5803                                                     Load->getMemoryVT());
5804
5805     // Resize the constant to the same size as the original memory access before
5806     // extension. If it is still the AllOnesValue then this AND is completely
5807     // unneeded.
5808     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5809
5810     bool B;
5811     switch (Load->getExtensionType()) {
5812     default: B = false; break;
5813     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5814     case ISD::ZEXTLOAD:
5815     case ISD::NON_EXTLOAD: B = true; break;
5816     }
5817
5818     if (B && Constant.isAllOnesValue()) {
5819       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5820       // preserve semantics once we get rid of the AND.
5821       SDValue NewLoad(Load, 0);
5822
5823       // Fold the AND away. NewLoad may get replaced immediately.
5824       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5825
5826       if (Load->getExtensionType() == ISD::EXTLOAD) {
5827         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5828                               Load->getValueType(0), SDLoc(Load),
5829                               Load->getChain(), Load->getBasePtr(),
5830                               Load->getOffset(), Load->getMemoryVT(),
5831                               Load->getMemOperand());
5832         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5833         if (Load->getNumValues() == 3) {
5834           // PRE/POST_INC loads have 3 values.
5835           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5836                            NewLoad.getValue(2) };
5837           CombineTo(Load, To, 3, true);
5838         } else {
5839           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5840         }
5841       }
5842
5843       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5844     }
5845   }
5846
5847   // fold (and (masked_gather x)) -> (zext_masked_gather x)
5848   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
5849     EVT MemVT = GN0->getMemoryVT();
5850     EVT ScalarVT = MemVT.getScalarType();
5851
5852     if (SDValue(GN0, 0).hasOneUse() &&
5853         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
5854         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
5855       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
5856                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
5857
5858       SDValue ZExtLoad = DAG.getMaskedGather(
5859           DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
5860           GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
5861
5862       CombineTo(N, ZExtLoad);
5863       AddToWorklist(ZExtLoad.getNode());
5864       // Avoid recheck of N.
5865       return SDValue(N, 0);
5866     }
5867   }
5868
5869   // fold (and (load x), 255) -> (zextload x, i8)
5870   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5871   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5872   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5873                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5874                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5875     if (SDValue Res = ReduceLoadWidth(N)) {
5876       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5877         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5878       AddToWorklist(N);
5879       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5880       return SDValue(N, 0);
5881     }
5882   }
5883
5884   if (LegalTypes) {
5885     // Attempt to propagate the AND back up to the leaves which, if they're
5886     // loads, can be combined to narrow loads and the AND node can be removed.
5887     // Perform after legalization so that extend nodes will already be
5888     // combined into the loads.
5889     if (BackwardsPropagateMask(N))
5890       return SDValue(N, 0);
5891   }
5892
5893   if (SDValue Combined = visitANDLike(N0, N1, N))
5894     return Combined;
5895
5896   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5897   if (N0.getOpcode() == N1.getOpcode())
5898     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5899       return V;
5900
5901   // Masking the negated extension of a boolean is just the zero-extended
5902   // boolean:
5903   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5904   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5905   //
5906   // Note: the SimplifyDemandedBits fold below can make an information-losing
5907   // transform, and then we have no way to find this better fold.
5908   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5909     if (isNullOrNullSplat(N0.getOperand(0))) {
5910       SDValue SubRHS = N0.getOperand(1);
5911       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5912           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5913         return SubRHS;
5914       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5915           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5916         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5917     }
5918   }
5919
5920   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5921   // fold (and (sra)) -> (and (srl)) when possible.
5922   if (SimplifyDemandedBits(SDValue(N, 0)))
5923     return SDValue(N, 0);
5924
5925   // fold (zext_inreg (extload x)) -> (zextload x)
5926   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5927   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5928       (ISD::isEXTLoad(N0.getNode()) ||
5929        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5930     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5931     EVT MemVT = LN0->getMemoryVT();
5932     // If we zero all the possible extended bits, then we can turn this into
5933     // a zextload if we are running before legalize or the operation is legal.
5934     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5935     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5936     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5937     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5938         ((!LegalOperations && LN0->isSimple()) ||
5939          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5940       SDValue ExtLoad =
5941           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5942                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5943       AddToWorklist(N);
5944       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5945       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5946     }
5947   }
5948
5949   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5950   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5951     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5952                                            N0.getOperand(1), false))
5953       return BSwap;
5954   }
5955
5956   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5957     return Shifts;
5958
5959   if (TLI.hasBitTest(N0, N1))
5960     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5961       return V;
5962
5963   // Recognize the following pattern:
5964   //
5965   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
5966   //
5967   // where bitmask is a mask that clears the upper bits of AndVT. The
5968   // number of bits in bitmask must be a power of two.
5969   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
5970     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
5971       return false;
5972
5973     auto *C = dyn_cast<ConstantSDNode>(RHS);
5974     if (!C)
5975       return false;
5976
5977     if (!C->getAPIntValue().isMask(
5978             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
5979       return false;
5980
5981     return true;
5982   };
5983
5984   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
5985   if (IsAndZeroExtMask(N0, N1))
5986     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
5987
5988   return SDValue();
5989 }
5990
5991 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5992 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5993                                         bool DemandHighBits) {
5994   if (!LegalOperations)
5995     return SDValue();
5996
5997   EVT VT = N->getValueType(0);
5998   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5999     return SDValue();
6000   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6001     return SDValue();
6002
6003   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6004   bool LookPassAnd0 = false;
6005   bool LookPassAnd1 = false;
6006   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6007       std::swap(N0, N1);
6008   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6009       std::swap(N0, N1);
6010   if (N0.getOpcode() == ISD::AND) {
6011     if (!N0.getNode()->hasOneUse())
6012       return SDValue();
6013     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6014     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6015     // This is needed for X86.
6016     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6017                   N01C->getZExtValue() != 0xFFFF))
6018       return SDValue();
6019     N0 = N0.getOperand(0);
6020     LookPassAnd0 = true;
6021   }
6022
6023   if (N1.getOpcode() == ISD::AND) {
6024     if (!N1.getNode()->hasOneUse())
6025       return SDValue();
6026     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6027     if (!N11C || N11C->getZExtValue() != 0xFF)
6028       return SDValue();
6029     N1 = N1.getOperand(0);
6030     LookPassAnd1 = true;
6031   }
6032
6033   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6034     std::swap(N0, N1);
6035   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6036     return SDValue();
6037   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
6038     return SDValue();
6039
6040   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6041   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6042   if (!N01C || !N11C)
6043     return SDValue();
6044   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6045     return SDValue();
6046
6047   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6048   SDValue N00 = N0->getOperand(0);
6049   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6050     if (!N00.getNode()->hasOneUse())
6051       return SDValue();
6052     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6053     if (!N001C || N001C->getZExtValue() != 0xFF)
6054       return SDValue();
6055     N00 = N00.getOperand(0);
6056     LookPassAnd0 = true;
6057   }
6058
6059   SDValue N10 = N1->getOperand(0);
6060   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6061     if (!N10.getNode()->hasOneUse())
6062       return SDValue();
6063     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6064     // Also allow 0xFFFF since the bits will be shifted out. This is needed
6065     // for X86.
6066     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6067                    N101C->getZExtValue() != 0xFFFF))
6068       return SDValue();
6069     N10 = N10.getOperand(0);
6070     LookPassAnd1 = true;
6071   }
6072
6073   if (N00 != N10)
6074     return SDValue();
6075
6076   // Make sure everything beyond the low halfword gets set to zero since the SRL
6077   // 16 will clear the top bits.
6078   unsigned OpSizeInBits = VT.getSizeInBits();
6079   if (DemandHighBits && OpSizeInBits > 16) {
6080     // If the left-shift isn't masked out then the only way this is a bswap is
6081     // if all bits beyond the low 8 are 0. In that case the entire pattern
6082     // reduces to a left shift anyway: leave it for other parts of the combiner.
6083     if (!LookPassAnd0)
6084       return SDValue();
6085
6086     // However, if the right shift isn't masked out then it might be because
6087     // it's not needed. See if we can spot that too.
6088     if (!LookPassAnd1 &&
6089         !DAG.MaskedValueIsZero(
6090             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6091       return SDValue();
6092   }
6093
6094   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6095   if (OpSizeInBits > 16) {
6096     SDLoc DL(N);
6097     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6098                       DAG.getConstant(OpSizeInBits - 16, DL,
6099                                       getShiftAmountTy(VT)));
6100   }
6101   return Res;
6102 }
6103
6104 /// Return true if the specified node is an element that makes up a 32-bit
6105 /// packed halfword byteswap.
6106 /// ((x & 0x000000ff) << 8) |
6107 /// ((x & 0x0000ff00) >> 8) |
6108 /// ((x & 0x00ff0000) << 8) |
6109 /// ((x & 0xff000000) >> 8)
6110 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6111   if (!N.getNode()->hasOneUse())
6112     return false;
6113
6114   unsigned Opc = N.getOpcode();
6115   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6116     return false;
6117
6118   SDValue N0 = N.getOperand(0);
6119   unsigned Opc0 = N0.getOpcode();
6120   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6121     return false;
6122
6123   ConstantSDNode *N1C = nullptr;
6124   // SHL or SRL: look upstream for AND mask operand
6125   if (Opc == ISD::AND)
6126     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6127   else if (Opc0 == ISD::AND)
6128     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6129   if (!N1C)
6130     return false;
6131
6132   unsigned MaskByteOffset;
6133   switch (N1C->getZExtValue()) {
6134   default:
6135     return false;
6136   case 0xFF:       MaskByteOffset = 0; break;
6137   case 0xFF00:     MaskByteOffset = 1; break;
6138   case 0xFFFF:
6139     // In case demanded bits didn't clear the bits that will be shifted out.
6140     // This is needed for X86.
6141     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6142       MaskByteOffset = 1;
6143       break;
6144     }
6145     return false;
6146   case 0xFF0000:   MaskByteOffset = 2; break;
6147   case 0xFF000000: MaskByteOffset = 3; break;
6148   }
6149
6150   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6151   if (Opc == ISD::AND) {
6152     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6153       // (x >> 8) & 0xff
6154       // (x >> 8) & 0xff0000
6155       if (Opc0 != ISD::SRL)
6156         return false;
6157       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6158       if (!C || C->getZExtValue() != 8)
6159         return false;
6160     } else {
6161       // (x << 8) & 0xff00
6162       // (x << 8) & 0xff000000
6163       if (Opc0 != ISD::SHL)
6164         return false;
6165       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6166       if (!C || C->getZExtValue() != 8)
6167         return false;
6168     }
6169   } else if (Opc == ISD::SHL) {
6170     // (x & 0xff) << 8
6171     // (x & 0xff0000) << 8
6172     if (MaskByteOffset != 0 && MaskByteOffset != 2)
6173       return false;
6174     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6175     if (!C || C->getZExtValue() != 8)
6176       return false;
6177   } else { // Opc == ISD::SRL
6178     // (x & 0xff00) >> 8
6179     // (x & 0xff000000) >> 8
6180     if (MaskByteOffset != 1 && MaskByteOffset != 3)
6181       return false;
6182     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6183     if (!C || C->getZExtValue() != 8)
6184       return false;
6185   }
6186
6187   if (Parts[MaskByteOffset])
6188     return false;
6189
6190   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6191   return true;
6192 }
6193
6194 // Match 2 elements of a packed halfword bswap.
6195 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6196   if (N.getOpcode() == ISD::OR)
6197     return isBSwapHWordElement(N.getOperand(0), Parts) &&
6198            isBSwapHWordElement(N.getOperand(1), Parts);
6199
6200   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6201     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6202     if (!C || C->getAPIntValue() != 16)
6203       return false;
6204     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6205     return true;
6206   }
6207
6208   return false;
6209 }
6210
6211 // Match this pattern:
6212 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6213 // And rewrite this to:
6214 //   (rotr (bswap A), 16)
6215 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6216                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
6217                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
6218   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6219          "MatchBSwapHWordOrAndAnd: expecting i32");
6220   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6221     return SDValue();
6222   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6223     return SDValue();
6224   // TODO: this is too restrictive; lifting this restriction requires more tests
6225   if (!N0->hasOneUse() || !N1->hasOneUse())
6226     return SDValue();
6227   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6228   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6229   if (!Mask0 || !Mask1)
6230     return SDValue();
6231   if (Mask0->getAPIntValue() != 0xff00ff00 ||
6232       Mask1->getAPIntValue() != 0x00ff00ff)
6233     return SDValue();
6234   SDValue Shift0 = N0.getOperand(0);
6235   SDValue Shift1 = N1.getOperand(0);
6236   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6237     return SDValue();
6238   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6239   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6240   if (!ShiftAmt0 || !ShiftAmt1)
6241     return SDValue();
6242   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6243     return SDValue();
6244   if (Shift0.getOperand(0) != Shift1.getOperand(0))
6245     return SDValue();
6246
6247   SDLoc DL(N);
6248   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6249   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6250   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6251 }
6252
6253 /// Match a 32-bit packed halfword bswap. That is
6254 /// ((x & 0x000000ff) << 8) |
6255 /// ((x & 0x0000ff00) >> 8) |
6256 /// ((x & 0x00ff0000) << 8) |
6257 /// ((x & 0xff000000) >> 8)
6258 /// => (rotl (bswap x), 16)
6259 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6260   if (!LegalOperations)
6261     return SDValue();
6262
6263   EVT VT = N->getValueType(0);
6264   if (VT != MVT::i32)
6265     return SDValue();
6266   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6267     return SDValue();
6268
6269   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6270                                               getShiftAmountTy(VT)))
6271   return BSwap;
6272
6273   // Try again with commuted operands.
6274   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6275                                               getShiftAmountTy(VT)))
6276   return BSwap;
6277
6278
6279   // Look for either
6280   // (or (bswaphpair), (bswaphpair))
6281   // (or (or (bswaphpair), (and)), (and))
6282   // (or (or (and), (bswaphpair)), (and))
6283   SDNode *Parts[4] = {};
6284
6285   if (isBSwapHWordPair(N0, Parts)) {
6286     // (or (or (and), (and)), (or (and), (and)))
6287     if (!isBSwapHWordPair(N1, Parts))
6288       return SDValue();
6289   } else if (N0.getOpcode() == ISD::OR) {
6290     // (or (or (or (and), (and)), (and)), (and))
6291     if (!isBSwapHWordElement(N1, Parts))
6292       return SDValue();
6293     SDValue N00 = N0.getOperand(0);
6294     SDValue N01 = N0.getOperand(1);
6295     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6296         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6297       return SDValue();
6298   } else
6299     return SDValue();
6300
6301   // Make sure the parts are all coming from the same node.
6302   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6303     return SDValue();
6304
6305   SDLoc DL(N);
6306   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6307                               SDValue(Parts[0], 0));
6308
6309   // Result of the bswap should be rotated by 16. If it's not legal, then
6310   // do  (x << 16) | (x >> 16).
6311   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6312   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6313     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6314   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6315     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6316   return DAG.getNode(ISD::OR, DL, VT,
6317                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6318                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6319 }
6320
6321 /// This contains all DAGCombine rules which reduce two values combined by
6322 /// an Or operation to a single value \see visitANDLike().
6323 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6324   EVT VT = N1.getValueType();
6325   SDLoc DL(N);
6326
6327   // fold (or x, undef) -> -1
6328   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6329     return DAG.getAllOnesConstant(DL, VT);
6330
6331   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6332     return V;
6333
6334   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
6335   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6336       // Don't increase # computations.
6337       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6338     // We can only do this xform if we know that bits from X that are set in C2
6339     // but not in C1 are already zero.  Likewise for Y.
6340     if (const ConstantSDNode *N0O1C =
6341         getAsNonOpaqueConstant(N0.getOperand(1))) {
6342       if (const ConstantSDNode *N1O1C =
6343           getAsNonOpaqueConstant(N1.getOperand(1))) {
6344         // We can only do this xform if we know that bits from X that are set in
6345         // C2 but not in C1 are already zero.  Likewise for Y.
6346         const APInt &LHSMask = N0O1C->getAPIntValue();
6347         const APInt &RHSMask = N1O1C->getAPIntValue();
6348
6349         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6350             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6351           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6352                                   N0.getOperand(0), N1.getOperand(0));
6353           return DAG.getNode(ISD::AND, DL, VT, X,
6354                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
6355         }
6356       }
6357     }
6358   }
6359
6360   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6361   if (N0.getOpcode() == ISD::AND &&
6362       N1.getOpcode() == ISD::AND &&
6363       N0.getOperand(0) == N1.getOperand(0) &&
6364       // Don't increase # computations.
6365       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6366     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6367                             N0.getOperand(1), N1.getOperand(1));
6368     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6369   }
6370
6371   return SDValue();
6372 }
6373
6374 /// OR combines for which the commuted variant will be tried as well.
6375 static SDValue visitORCommutative(
6376     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6377   EVT VT = N0.getValueType();
6378   if (N0.getOpcode() == ISD::AND) {
6379     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6380     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6381       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6382
6383     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6384     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6385       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6386   }
6387
6388   return SDValue();
6389 }
6390
6391 SDValue DAGCombiner::visitOR(SDNode *N) {
6392   SDValue N0 = N->getOperand(0);
6393   SDValue N1 = N->getOperand(1);
6394   EVT VT = N1.getValueType();
6395
6396   // x | x --> x
6397   if (N0 == N1)
6398     return N0;
6399
6400   // fold vector ops
6401   if (VT.isVector()) {
6402     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6403       return FoldedVOp;
6404
6405     // fold (or x, 0) -> x, vector edition
6406     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
6407       return N1;
6408     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6409       return N0;
6410
6411     // fold (or x, -1) -> -1, vector edition
6412     if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
6413       // do not return N0, because undef node may exist in N0
6414       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
6415     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6416       // do not return N1, because undef node may exist in N1
6417       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6418
6419     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6420     // Do this only if the resulting shuffle is legal.
6421     if (isa<ShuffleVectorSDNode>(N0) &&
6422         isa<ShuffleVectorSDNode>(N1) &&
6423         // Avoid folding a node with illegal type.
6424         TLI.isTypeLegal(VT)) {
6425       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6426       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6427       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6428       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6429       // Ensure both shuffles have a zero input.
6430       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6431         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6432         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6433         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6434         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6435         bool CanFold = true;
6436         int NumElts = VT.getVectorNumElements();
6437         SmallVector<int, 4> Mask(NumElts);
6438
6439         for (int i = 0; i != NumElts; ++i) {
6440           int M0 = SV0->getMaskElt(i);
6441           int M1 = SV1->getMaskElt(i);
6442
6443           // Determine if either index is pointing to a zero vector.
6444           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6445           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6446
6447           // If one element is zero and the otherside is undef, keep undef.
6448           // This also handles the case that both are undef.
6449           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6450             Mask[i] = -1;
6451             continue;
6452           }
6453
6454           // Make sure only one of the elements is zero.
6455           if (M0Zero == M1Zero) {
6456             CanFold = false;
6457             break;
6458           }
6459
6460           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6461
6462           // We have a zero and non-zero element. If the non-zero came from
6463           // SV0 make the index a LHS index. If it came from SV1, make it
6464           // a RHS index. We need to mod by NumElts because we don't care
6465           // which operand it came from in the original shuffles.
6466           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6467         }
6468
6469         if (CanFold) {
6470           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6471           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6472
6473           SDValue LegalShuffle =
6474               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6475                                           Mask, DAG);
6476           if (LegalShuffle)
6477             return LegalShuffle;
6478         }
6479       }
6480     }
6481   }
6482
6483   // fold (or c1, c2) -> c1|c2
6484   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6485   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6486     return C;
6487
6488   // canonicalize constant to RHS
6489   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6490      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6491     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6492
6493   // fold (or x, 0) -> x
6494   if (isNullConstant(N1))
6495     return N0;
6496
6497   // fold (or x, -1) -> -1
6498   if (isAllOnesConstant(N1))
6499     return N1;
6500
6501   if (SDValue NewSel = foldBinOpIntoSelect(N))
6502     return NewSel;
6503
6504   // fold (or x, c) -> c iff (x & ~c) == 0
6505   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6506     return N1;
6507
6508   if (SDValue Combined = visitORLike(N0, N1, N))
6509     return Combined;
6510
6511   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6512     return Combined;
6513
6514   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6515   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6516     return BSwap;
6517   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6518     return BSwap;
6519
6520   // reassociate or
6521   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6522     return ROR;
6523
6524   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6525   // iff (c1 & c2) != 0 or c1/c2 are undef.
6526   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6527     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6528   };
6529   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6530       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6531     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6532                                                  {N1, N0.getOperand(1)})) {
6533       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6534       AddToWorklist(IOR.getNode());
6535       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6536     }
6537   }
6538
6539   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6540     return Combined;
6541   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6542     return Combined;
6543
6544   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6545   if (N0.getOpcode() == N1.getOpcode())
6546     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6547       return V;
6548
6549   // See if this is some rotate idiom.
6550   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6551     return Rot;
6552
6553   if (SDValue Load = MatchLoadCombine(N))
6554     return Load;
6555
6556   // Simplify the operands using demanded-bits information.
6557   if (SimplifyDemandedBits(SDValue(N, 0)))
6558     return SDValue(N, 0);
6559
6560   // If OR can be rewritten into ADD, try combines based on ADD.
6561   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6562       DAG.haveNoCommonBitsSet(N0, N1))
6563     if (SDValue Combined = visitADDLike(N))
6564       return Combined;
6565
6566   return SDValue();
6567 }
6568
6569 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6570   if (Op.getOpcode() == ISD::AND &&
6571       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6572     Mask = Op.getOperand(1);
6573     return Op.getOperand(0);
6574   }
6575   return Op;
6576 }
6577
6578 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6579 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6580                             SDValue &Mask) {
6581   Op = stripConstantMask(DAG, Op, Mask);
6582   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6583     Shift = Op;
6584     return true;
6585   }
6586   return false;
6587 }
6588
6589 /// Helper function for visitOR to extract the needed side of a rotate idiom
6590 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
6591 /// InstCombine merged some outside op with one of the shifts from
6592 /// the rotate pattern.
6593 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6594 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6595 /// patterns:
6596 ///
6597 ///   (or (add v v) (shrl v bitwidth-1)):
6598 ///     expands (add v v) -> (shl v 1)
6599 ///
6600 ///   (or (mul v c0) (shrl (mul v c1) c2)):
6601 ///     expands (mul v c0) -> (shl (mul v c1) c3)
6602 ///
6603 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
6604 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
6605 ///
6606 ///   (or (shl v c0) (shrl (shl v c1) c2)):
6607 ///     expands (shl v c0) -> (shl (shl v c1) c3)
6608 ///
6609 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
6610 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
6611 ///
6612 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6613 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6614                                      SDValue ExtractFrom, SDValue &Mask,
6615                                      const SDLoc &DL) {
6616   assert(OppShift && ExtractFrom && "Empty SDValue");
6617   assert(
6618       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6619       "Existing shift must be valid as a rotate half");
6620
6621   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6622
6623   // Value and Type of the shift.
6624   SDValue OppShiftLHS = OppShift.getOperand(0);
6625   EVT ShiftedVT = OppShiftLHS.getValueType();
6626
6627   // Amount of the existing shift.
6628   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6629
6630   // (add v v) -> (shl v 1)
6631   // TODO: Should this be a general DAG canonicalization?
6632   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6633       ExtractFrom.getOpcode() == ISD::ADD &&
6634       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6635       ExtractFrom.getOperand(0) == OppShiftLHS &&
6636       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6637     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6638                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6639
6640   // Preconditions:
6641   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6642   //
6643   // Find opcode of the needed shift to be extracted from (op0 v c0).
6644   unsigned Opcode = ISD::DELETED_NODE;
6645   bool IsMulOrDiv = false;
6646   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6647   // opcode or its arithmetic (mul or udiv) variant.
6648   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6649     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6650     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6651       return false;
6652     Opcode = NeededShift;
6653     return true;
6654   };
6655   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6656   // that the needed shift can be extracted from.
6657   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6658       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6659     return SDValue();
6660
6661   // op0 must be the same opcode on both sides, have the same LHS argument,
6662   // and produce the same value type.
6663   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6664       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6665       ShiftedVT != ExtractFrom.getValueType())
6666     return SDValue();
6667
6668   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6669   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6670   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6671   ConstantSDNode *ExtractFromCst =
6672       isConstOrConstSplat(ExtractFrom.getOperand(1));
6673   // TODO: We should be able to handle non-uniform constant vectors for these values
6674   // Check that we have constant values.
6675   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6676       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6677       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6678     return SDValue();
6679
6680   // Compute the shift amount we need to extract to complete the rotate.
6681   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6682   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6683     return SDValue();
6684   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6685   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6686   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6687   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6688   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6689
6690   // Now try extract the needed shift from the ExtractFrom op and see if the
6691   // result matches up with the existing shift's LHS op.
6692   if (IsMulOrDiv) {
6693     // Op to extract from is a mul or udiv by a constant.
6694     // Check:
6695     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6696     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6697     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6698                                                  NeededShiftAmt.getZExtValue());
6699     APInt ResultAmt;
6700     APInt Rem;
6701     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6702     if (Rem != 0 || ResultAmt != OppLHSAmt)
6703       return SDValue();
6704   } else {
6705     // Op to extract from is a shift by a constant.
6706     // Check:
6707     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6708     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6709                                           ExtractFromAmt.getBitWidth()))
6710       return SDValue();
6711   }
6712
6713   // Return the expanded shift op that should allow a rotate to be formed.
6714   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6715   EVT ResVT = ExtractFrom.getValueType();
6716   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6717   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6718 }
6719
6720 // Return true if we can prove that, whenever Neg and Pos are both in the
6721 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6722 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6723 //
6724 //     (or (shift1 X, Neg), (shift2 X, Pos))
6725 //
6726 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6727 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6728 // to consider shift amounts with defined behavior.
6729 //
6730 // The IsRotate flag should be set when the LHS of both shifts is the same.
6731 // Otherwise if matching a general funnel shift, it should be clear.
6732 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6733                            SelectionDAG &DAG, bool IsRotate) {
6734   // If EltSize is a power of 2 then:
6735   //
6736   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6737   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6738   //
6739   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6740   // for the stronger condition:
6741   //
6742   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6743   //
6744   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6745   // we can just replace Neg with Neg' for the rest of the function.
6746   //
6747   // In other cases we check for the even stronger condition:
6748   //
6749   //     Neg == EltSize - Pos                                    [B]
6750   //
6751   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6752   // behavior if Pos == 0 (and consequently Neg == EltSize).
6753   //
6754   // We could actually use [A] whenever EltSize is a power of 2, but the
6755   // only extra cases that it would match are those uninteresting ones
6756   // where Neg and Pos are never in range at the same time.  E.g. for
6757   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6758   // as well as (sub 32, Pos), but:
6759   //
6760   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6761   //
6762   // always invokes undefined behavior for 32-bit X.
6763   //
6764   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6765   //
6766   // NOTE: We can only do this when matching an AND and not a general
6767   // funnel shift.
6768   unsigned MaskLoBits = 0;
6769   if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6770     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6771       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6772       unsigned Bits = Log2_64(EltSize);
6773       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6774           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6775         Neg = Neg.getOperand(0);
6776         MaskLoBits = Bits;
6777       }
6778     }
6779   }
6780
6781   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6782   if (Neg.getOpcode() != ISD::SUB)
6783     return false;
6784   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6785   if (!NegC)
6786     return false;
6787   SDValue NegOp1 = Neg.getOperand(1);
6788
6789   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6790   // Pos'.  The truncation is redundant for the purpose of the equality.
6791   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6792     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6793       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6794       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6795           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6796            MaskLoBits))
6797         Pos = Pos.getOperand(0);
6798     }
6799   }
6800
6801   // The condition we need is now:
6802   //
6803   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6804   //
6805   // If NegOp1 == Pos then we need:
6806   //
6807   //              EltSize & Mask == NegC & Mask
6808   //
6809   // (because "x & Mask" is a truncation and distributes through subtraction).
6810   //
6811   // We also need to account for a potential truncation of NegOp1 if the amount
6812   // has already been legalized to a shift amount type.
6813   APInt Width;
6814   if ((Pos == NegOp1) ||
6815       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6816     Width = NegC->getAPIntValue();
6817
6818   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6819   // Then the condition we want to prove becomes:
6820   //
6821   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6822   //
6823   // which, again because "x & Mask" is a truncation, becomes:
6824   //
6825   //                NegC & Mask == (EltSize - PosC) & Mask
6826   //             EltSize & Mask == (NegC + PosC) & Mask
6827   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6828     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6829       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6830     else
6831       return false;
6832   } else
6833     return false;
6834
6835   // Now we just need to check that EltSize & Mask == Width & Mask.
6836   if (MaskLoBits)
6837     // EltSize & Mask is 0 since Mask is EltSize - 1.
6838     return Width.getLoBits(MaskLoBits) == 0;
6839   return Width == EltSize;
6840 }
6841
6842 // A subroutine of MatchRotate used once we have found an OR of two opposite
6843 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6844 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6845 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6846 // Neg with outer conversions stripped away.
6847 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6848                                        SDValue Neg, SDValue InnerPos,
6849                                        SDValue InnerNeg, unsigned PosOpcode,
6850                                        unsigned NegOpcode, const SDLoc &DL) {
6851   // fold (or (shl x, (*ext y)),
6852   //          (srl x, (*ext (sub 32, y)))) ->
6853   //   (rotl x, y) or (rotr x, (sub 32, y))
6854   //
6855   // fold (or (shl x, (*ext (sub 32, y))),
6856   //          (srl x, (*ext y))) ->
6857   //   (rotr x, y) or (rotl x, (sub 32, y))
6858   EVT VT = Shifted.getValueType();
6859   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
6860                      /*IsRotate*/ true)) {
6861     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6862     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6863                        HasPos ? Pos : Neg);
6864   }
6865
6866   return SDValue();
6867 }
6868
6869 // A subroutine of MatchRotate used once we have found an OR of two opposite
6870 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
6871 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6872 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6873 // Neg with outer conversions stripped away.
6874 // TODO: Merge with MatchRotatePosNeg.
6875 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6876                                        SDValue Neg, SDValue InnerPos,
6877                                        SDValue InnerNeg, unsigned PosOpcode,
6878                                        unsigned NegOpcode, const SDLoc &DL) {
6879   EVT VT = N0.getValueType();
6880   unsigned EltBits = VT.getScalarSizeInBits();
6881
6882   // fold (or (shl x0, (*ext y)),
6883   //          (srl x1, (*ext (sub 32, y)))) ->
6884   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6885   //
6886   // fold (or (shl x0, (*ext (sub 32, y))),
6887   //          (srl x1, (*ext y))) ->
6888   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6889   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
6890     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6891     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6892                        HasPos ? Pos : Neg);
6893   }
6894
6895   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6896   // so for now just use the PosOpcode case if its legal.
6897   // TODO: When can we use the NegOpcode case?
6898   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6899     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6900       if (Op.getOpcode() != BinOpc)
6901         return false;
6902       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6903       return Cst && (Cst->getAPIntValue() == Imm);
6904     };
6905
6906     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6907     //   -> (fshl x0, x1, y)
6908     if (IsBinOpImm(N1, ISD::SRL, 1) &&
6909         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6910         InnerPos == InnerNeg.getOperand(0) &&
6911         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6912       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6913     }
6914
6915     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6916     //   -> (fshr x0, x1, y)
6917     if (IsBinOpImm(N0, ISD::SHL, 1) &&
6918         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6919         InnerNeg == InnerPos.getOperand(0) &&
6920         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6921       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6922     }
6923
6924     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6925     //   -> (fshr x0, x1, y)
6926     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6927     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6928         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6929         InnerNeg == InnerPos.getOperand(0) &&
6930         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6931       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6932     }
6933   }
6934
6935   return SDValue();
6936 }
6937
6938 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6939 // idioms for rotate, and if the target supports rotation instructions, generate
6940 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6941 // with different shifted sources.
6942 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6943   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6944   EVT VT = LHS.getValueType();
6945   if (!TLI.isTypeLegal(VT))
6946     return SDValue();
6947
6948   // The target must have at least one rotate/funnel flavor.
6949   bool HasROTL = hasOperation(ISD::ROTL, VT);
6950   bool HasROTR = hasOperation(ISD::ROTR, VT);
6951   bool HasFSHL = hasOperation(ISD::FSHL, VT);
6952   bool HasFSHR = hasOperation(ISD::FSHR, VT);
6953   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6954     return SDValue();
6955
6956   // Check for truncated rotate.
6957   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6958       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6959     assert(LHS.getValueType() == RHS.getValueType());
6960     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6961       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6962     }
6963   }
6964
6965   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6966   SDValue LHSShift;   // The shift.
6967   SDValue LHSMask;    // AND value if any.
6968   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6969
6970   SDValue RHSShift;   // The shift.
6971   SDValue RHSMask;    // AND value if any.
6972   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6973
6974   // If neither side matched a rotate half, bail
6975   if (!LHSShift && !RHSShift)
6976     return SDValue();
6977
6978   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6979   // side of the rotate, so try to handle that here. In all cases we need to
6980   // pass the matched shift from the opposite side to compute the opcode and
6981   // needed shift amount to extract.  We still want to do this if both sides
6982   // matched a rotate half because one half may be a potential overshift that
6983   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6984   // single one).
6985
6986   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6987   if (LHSShift)
6988     if (SDValue NewRHSShift =
6989             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6990       RHSShift = NewRHSShift;
6991   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6992   if (RHSShift)
6993     if (SDValue NewLHSShift =
6994             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6995       LHSShift = NewLHSShift;
6996
6997   // If a side is still missing, nothing else we can do.
6998   if (!RHSShift || !LHSShift)
6999     return SDValue();
7000
7001   // At this point we've matched or extracted a shift op on each side.
7002
7003   if (LHSShift.getOpcode() == RHSShift.getOpcode())
7004     return SDValue(); // Shifts must disagree.
7005
7006   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7007   if (!IsRotate && !(HasFSHL || HasFSHR))
7008     return SDValue(); // Requires funnel shift support.
7009
7010   // Canonicalize shl to left side in a shl/srl pair.
7011   if (RHSShift.getOpcode() == ISD::SHL) {
7012     std::swap(LHS, RHS);
7013     std::swap(LHSShift, RHSShift);
7014     std::swap(LHSMask, RHSMask);
7015   }
7016
7017   unsigned EltSizeInBits = VT.getScalarSizeInBits();
7018   SDValue LHSShiftArg = LHSShift.getOperand(0);
7019   SDValue LHSShiftAmt = LHSShift.getOperand(1);
7020   SDValue RHSShiftArg = RHSShift.getOperand(0);
7021   SDValue RHSShiftAmt = RHSShift.getOperand(1);
7022
7023   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7024   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7025   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7026   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7027   // iff C1+C2 == EltSizeInBits
7028   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7029                                         ConstantSDNode *RHS) {
7030     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7031   };
7032   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7033     SDValue Res;
7034     if (IsRotate && (HasROTL || HasROTR))
7035       Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7036                         HasROTL ? LHSShiftAmt : RHSShiftAmt);
7037     else
7038       Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7039                         RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
7040
7041     // If there is an AND of either shifted operand, apply it to the result.
7042     if (LHSMask.getNode() || RHSMask.getNode()) {
7043       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7044       SDValue Mask = AllOnes;
7045
7046       if (LHSMask.getNode()) {
7047         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7048         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7049                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7050       }
7051       if (RHSMask.getNode()) {
7052         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7053         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7054                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7055       }
7056
7057       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7058     }
7059
7060     return Res;
7061   }
7062
7063   // If there is a mask here, and we have a variable shift, we can't be sure
7064   // that we're masking out the right stuff.
7065   if (LHSMask.getNode() || RHSMask.getNode())
7066     return SDValue();
7067
7068   // If the shift amount is sign/zext/any-extended just peel it off.
7069   SDValue LExtOp0 = LHSShiftAmt;
7070   SDValue RExtOp0 = RHSShiftAmt;
7071   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7072        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7073        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7074        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7075       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7076        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7077        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7078        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7079     LExtOp0 = LHSShiftAmt.getOperand(0);
7080     RExtOp0 = RHSShiftAmt.getOperand(0);
7081   }
7082
7083   if (IsRotate && (HasROTL || HasROTR)) {
7084     SDValue TryL =
7085         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7086                           RExtOp0, ISD::ROTL, ISD::ROTR, DL);
7087     if (TryL)
7088       return TryL;
7089
7090     SDValue TryR =
7091         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7092                           LExtOp0, ISD::ROTR, ISD::ROTL, DL);
7093     if (TryR)
7094       return TryR;
7095   }
7096
7097   SDValue TryL =
7098       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7099                         LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7100   if (TryL)
7101     return TryL;
7102
7103   SDValue TryR =
7104       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7105                         RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7106   if (TryR)
7107     return TryR;
7108
7109   return SDValue();
7110 }
7111
7112 namespace {
7113
7114 /// Represents known origin of an individual byte in load combine pattern. The
7115 /// value of the byte is either constant zero or comes from memory.
7116 struct ByteProvider {
7117   // For constant zero providers Load is set to nullptr. For memory providers
7118   // Load represents the node which loads the byte from memory.
7119   // ByteOffset is the offset of the byte in the value produced by the load.
7120   LoadSDNode *Load = nullptr;
7121   unsigned ByteOffset = 0;
7122
7123   ByteProvider() = default;
7124
7125   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7126     return ByteProvider(Load, ByteOffset);
7127   }
7128
7129   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7130
7131   bool isConstantZero() const { return !Load; }
7132   bool isMemory() const { return Load; }
7133
7134   bool operator==(const ByteProvider &Other) const {
7135     return Other.Load == Load && Other.ByteOffset == ByteOffset;
7136   }
7137
7138 private:
7139   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7140       : Load(Load), ByteOffset(ByteOffset) {}
7141 };
7142
7143 } // end anonymous namespace
7144
7145 /// Recursively traverses the expression calculating the origin of the requested
7146 /// byte of the given value. Returns None if the provider can't be calculated.
7147 ///
7148 /// For all the values except the root of the expression verifies that the value
7149 /// has exactly one use and if it's not true return None. This way if the origin
7150 /// of the byte is returned it's guaranteed that the values which contribute to
7151 /// the byte are not used outside of this expression.
7152 ///
7153 /// Because the parts of the expression are not allowed to have more than one
7154 /// use this function iterates over trees, not DAGs. So it never visits the same
7155 /// node more than once.
7156 static const Optional<ByteProvider>
7157 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7158                       bool Root = false) {
7159   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7160   if (Depth == 10)
7161     return None;
7162
7163   if (!Root && !Op.hasOneUse())
7164     return None;
7165
7166   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
7167   unsigned BitWidth = Op.getValueSizeInBits();
7168   if (BitWidth % 8 != 0)
7169     return None;
7170   unsigned ByteWidth = BitWidth / 8;
7171   assert(Index < ByteWidth && "invalid index requested");
7172   (void) ByteWidth;
7173
7174   switch (Op.getOpcode()) {
7175   case ISD::OR: {
7176     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7177     if (!LHS)
7178       return None;
7179     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7180     if (!RHS)
7181       return None;
7182
7183     if (LHS->isConstantZero())
7184       return RHS;
7185     if (RHS->isConstantZero())
7186       return LHS;
7187     return None;
7188   }
7189   case ISD::SHL: {
7190     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7191     if (!ShiftOp)
7192       return None;
7193
7194     uint64_t BitShift = ShiftOp->getZExtValue();
7195     if (BitShift % 8 != 0)
7196       return None;
7197     uint64_t ByteShift = BitShift / 8;
7198
7199     return Index < ByteShift
7200                ? ByteProvider::getConstantZero()
7201                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7202                                        Depth + 1);
7203   }
7204   case ISD::ANY_EXTEND:
7205   case ISD::SIGN_EXTEND:
7206   case ISD::ZERO_EXTEND: {
7207     SDValue NarrowOp = Op->getOperand(0);
7208     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7209     if (NarrowBitWidth % 8 != 0)
7210       return None;
7211     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7212
7213     if (Index >= NarrowByteWidth)
7214       return Op.getOpcode() == ISD::ZERO_EXTEND
7215                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7216                  : None;
7217     return calculateByteProvider(NarrowOp, Index, Depth + 1);
7218   }
7219   case ISD::BSWAP:
7220     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7221                                  Depth + 1);
7222   case ISD::LOAD: {
7223     auto L = cast<LoadSDNode>(Op.getNode());
7224     if (!L->isSimple() || L->isIndexed())
7225       return None;
7226
7227     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7228     if (NarrowBitWidth % 8 != 0)
7229       return None;
7230     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7231
7232     if (Index >= NarrowByteWidth)
7233       return L->getExtensionType() == ISD::ZEXTLOAD
7234                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7235                  : None;
7236     return ByteProvider::getMemory(L, Index);
7237   }
7238   }
7239
7240   return None;
7241 }
7242
7243 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7244   return i;
7245 }
7246
7247 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7248   return BW - i - 1;
7249 }
7250
7251 // Check if the bytes offsets we are looking at match with either big or
7252 // little endian value loaded. Return true for big endian, false for little
7253 // endian, and None if match failed.
7254 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7255                                   int64_t FirstOffset) {
7256   // The endian can be decided only when it is 2 bytes at least.
7257   unsigned Width = ByteOffsets.size();
7258   if (Width < 2)
7259     return None;
7260
7261   bool BigEndian = true, LittleEndian = true;
7262   for (unsigned i = 0; i < Width; i++) {
7263     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7264     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7265     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7266     if (!BigEndian && !LittleEndian)
7267       return None;
7268   }
7269
7270   assert((BigEndian != LittleEndian) && "It should be either big endian or"
7271                                         "little endian");
7272   return BigEndian;
7273 }
7274
7275 static SDValue stripTruncAndExt(SDValue Value) {
7276   switch (Value.getOpcode()) {
7277   case ISD::TRUNCATE:
7278   case ISD::ZERO_EXTEND:
7279   case ISD::SIGN_EXTEND:
7280   case ISD::ANY_EXTEND:
7281     return stripTruncAndExt(Value.getOperand(0));
7282   }
7283   return Value;
7284 }
7285
7286 /// Match a pattern where a wide type scalar value is stored by several narrow
7287 /// stores. Fold it into a single store or a BSWAP and a store if the targets
7288 /// supports it.
7289 ///
7290 /// Assuming little endian target:
7291 ///  i8 *p = ...
7292 ///  i32 val = ...
7293 ///  p[0] = (val >> 0) & 0xFF;
7294 ///  p[1] = (val >> 8) & 0xFF;
7295 ///  p[2] = (val >> 16) & 0xFF;
7296 ///  p[3] = (val >> 24) & 0xFF;
7297 /// =>
7298 ///  *((i32)p) = val;
7299 ///
7300 ///  i8 *p = ...
7301 ///  i32 val = ...
7302 ///  p[0] = (val >> 24) & 0xFF;
7303 ///  p[1] = (val >> 16) & 0xFF;
7304 ///  p[2] = (val >> 8) & 0xFF;
7305 ///  p[3] = (val >> 0) & 0xFF;
7306 /// =>
7307 ///  *((i32)p) = BSWAP(val);
7308 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7309   // The matching looks for "store (trunc x)" patterns that appear early but are
7310   // likely to be replaced by truncating store nodes during combining.
7311   // TODO: If there is evidence that running this later would help, this
7312   //       limitation could be removed. Legality checks may need to be added
7313   //       for the created store and optional bswap/rotate.
7314   if (LegalOperations)
7315     return SDValue();
7316
7317   // We only handle merging simple stores of 1-4 bytes.
7318   // TODO: Allow unordered atomics when wider type is legal (see D66309)
7319   EVT MemVT = N->getMemoryVT();
7320   if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7321       !N->isSimple() || N->isIndexed())
7322     return SDValue();
7323
7324   // Collect all of the stores in the chain.
7325   SDValue Chain = N->getChain();
7326   SmallVector<StoreSDNode *, 8> Stores = {N};
7327   while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7328     // All stores must be the same size to ensure that we are writing all of the
7329     // bytes in the wide value.
7330     // TODO: We could allow multiple sizes by tracking each stored byte.
7331     if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7332         Store->isIndexed())
7333       return SDValue();
7334     Stores.push_back(Store);
7335     Chain = Store->getChain();
7336   }
7337   // There is no reason to continue if we do not have at least a pair of stores.
7338   if (Stores.size() < 2)
7339     return SDValue();
7340
7341   // Handle simple types only.
7342   LLVMContext &Context = *DAG.getContext();
7343   unsigned NumStores = Stores.size();
7344   unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7345   unsigned WideNumBits = NumStores * NarrowNumBits;
7346   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7347   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7348     return SDValue();
7349
7350   // Check if all bytes of the source value that we are looking at are stored
7351   // to the same base address. Collect offsets from Base address into OffsetMap.
7352   SDValue SourceValue;
7353   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7354   int64_t FirstOffset = INT64_MAX;
7355   StoreSDNode *FirstStore = nullptr;
7356   Optional<BaseIndexOffset> Base;
7357   for (auto Store : Stores) {
7358     // All the stores store different parts of the CombinedValue. A truncate is
7359     // required to get the partial value.
7360     SDValue Trunc = Store->getValue();
7361     if (Trunc.getOpcode() != ISD::TRUNCATE)
7362       return SDValue();
7363     // Other than the first/last part, a shift operation is required to get the
7364     // offset.
7365     int64_t Offset = 0;
7366     SDValue WideVal = Trunc.getOperand(0);
7367     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7368         isa<ConstantSDNode>(WideVal.getOperand(1))) {
7369       // The shift amount must be a constant multiple of the narrow type.
7370       // It is translated to the offset address in the wide source value "y".
7371       //
7372       // x = srl y, ShiftAmtC
7373       // i8 z = trunc x
7374       // store z, ...
7375       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7376       if (ShiftAmtC % NarrowNumBits != 0)
7377         return SDValue();
7378
7379       Offset = ShiftAmtC / NarrowNumBits;
7380       WideVal = WideVal.getOperand(0);
7381     }
7382
7383     // Stores must share the same source value with different offsets.
7384     // Truncate and extends should be stripped to get the single source value.
7385     if (!SourceValue)
7386       SourceValue = WideVal;
7387     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7388       return SDValue();
7389     else if (SourceValue.getValueType() != WideVT) {
7390       if (WideVal.getValueType() == WideVT ||
7391           WideVal.getScalarValueSizeInBits() >
7392               SourceValue.getScalarValueSizeInBits())
7393         SourceValue = WideVal;
7394       // Give up if the source value type is smaller than the store size.
7395       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7396         return SDValue();
7397     }
7398
7399     // Stores must share the same base address.
7400     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7401     int64_t ByteOffsetFromBase = 0;
7402     if (!Base)
7403       Base = Ptr;
7404     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7405       return SDValue();
7406
7407     // Remember the first store.
7408     if (ByteOffsetFromBase < FirstOffset) {
7409       FirstStore = Store;
7410       FirstOffset = ByteOffsetFromBase;
7411     }
7412     // Map the offset in the store and the offset in the combined value, and
7413     // early return if it has been set before.
7414     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7415       return SDValue();
7416     OffsetMap[Offset] = ByteOffsetFromBase;
7417   }
7418
7419   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7420   assert(FirstStore && "First store must be set");
7421
7422   // Check that a store of the wide type is both allowed and fast on the target
7423   const DataLayout &Layout = DAG.getDataLayout();
7424   bool Fast = false;
7425   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7426                                         *FirstStore->getMemOperand(), &Fast);
7427   if (!Allowed || !Fast)
7428     return SDValue();
7429
7430   // Check if the pieces of the value are going to the expected places in memory
7431   // to merge the stores.
7432   auto checkOffsets = [&](bool MatchLittleEndian) {
7433     if (MatchLittleEndian) {
7434       for (unsigned i = 0; i != NumStores; ++i)
7435         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7436           return false;
7437     } else { // MatchBigEndian by reversing loop counter.
7438       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7439         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7440           return false;
7441     }
7442     return true;
7443   };
7444
7445   // Check if the offsets line up for the native data layout of this target.
7446   bool NeedBswap = false;
7447   bool NeedRotate = false;
7448   if (!checkOffsets(Layout.isLittleEndian())) {
7449     // Special-case: check if byte offsets line up for the opposite endian.
7450     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7451       NeedBswap = true;
7452     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7453       NeedRotate = true;
7454     else
7455       return SDValue();
7456   }
7457
7458   SDLoc DL(N);
7459   if (WideVT != SourceValue.getValueType()) {
7460     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
7461            "Unexpected store value to merge");
7462     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7463   }
7464
7465   // Before legalize we can introduce illegal bswaps/rotates which will be later
7466   // converted to an explicit bswap sequence. This way we end up with a single
7467   // store and byte shuffling instead of several stores and byte shuffling.
7468   if (NeedBswap) {
7469     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7470   } else if (NeedRotate) {
7471     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
7472     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7473     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7474   }
7475
7476   SDValue NewStore =
7477       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7478                    FirstStore->getPointerInfo(), FirstStore->getAlign());
7479
7480   // Rely on other DAG combine rules to remove the other individual stores.
7481   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7482   return NewStore;
7483 }
7484
7485 /// Match a pattern where a wide type scalar value is loaded by several narrow
7486 /// loads and combined by shifts and ors. Fold it into a single load or a load
7487 /// and a BSWAP if the targets supports it.
7488 ///
7489 /// Assuming little endian target:
7490 ///  i8 *a = ...
7491 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7492 /// =>
7493 ///  i32 val = *((i32)a)
7494 ///
7495 ///  i8 *a = ...
7496 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7497 /// =>
7498 ///  i32 val = BSWAP(*((i32)a))
7499 ///
7500 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7501 /// interact well with the worklist mechanism. When a part of the pattern is
7502 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7503 /// but the root node of the pattern which triggers the load combine is not
7504 /// necessarily a direct user of the changed node. For example, once the address
7505 /// of t28 load is reassociated load combine won't be triggered:
7506 ///             t25: i32 = add t4, Constant:i32<2>
7507 ///           t26: i64 = sign_extend t25
7508 ///        t27: i64 = add t2, t26
7509 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7510 ///     t29: i32 = zero_extend t28
7511 ///   t32: i32 = shl t29, Constant:i8<8>
7512 /// t33: i32 = or t23, t32
7513 /// As a possible fix visitLoad can check if the load can be a part of a load
7514 /// combine pattern and add corresponding OR roots to the worklist.
7515 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7516   assert(N->getOpcode() == ISD::OR &&
7517          "Can only match load combining against OR nodes");
7518
7519   // Handles simple types only
7520   EVT VT = N->getValueType(0);
7521   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7522     return SDValue();
7523   unsigned ByteWidth = VT.getSizeInBits() / 8;
7524
7525   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7526   auto MemoryByteOffset = [&] (ByteProvider P) {
7527     assert(P.isMemory() && "Must be a memory byte provider");
7528     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7529     assert(LoadBitWidth % 8 == 0 &&
7530            "can only analyze providers for individual bytes not bit");
7531     unsigned LoadByteWidth = LoadBitWidth / 8;
7532     return IsBigEndianTarget
7533             ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7534             : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7535   };
7536
7537   Optional<BaseIndexOffset> Base;
7538   SDValue Chain;
7539
7540   SmallPtrSet<LoadSDNode *, 8> Loads;
7541   Optional<ByteProvider> FirstByteProvider;
7542   int64_t FirstOffset = INT64_MAX;
7543
7544   // Check if all the bytes of the OR we are looking at are loaded from the same
7545   // base address. Collect bytes offsets from Base address in ByteOffsets.
7546   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7547   unsigned ZeroExtendedBytes = 0;
7548   for (int i = ByteWidth - 1; i >= 0; --i) {
7549     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7550     if (!P)
7551       return SDValue();
7552
7553     if (P->isConstantZero()) {
7554       // It's OK for the N most significant bytes to be 0, we can just
7555       // zero-extend the load.
7556       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7557         return SDValue();
7558       continue;
7559     }
7560     assert(P->isMemory() && "provenance should either be memory or zero");
7561
7562     LoadSDNode *L = P->Load;
7563     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7564            !L->isIndexed() &&
7565            "Must be enforced by calculateByteProvider");
7566     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7567
7568     // All loads must share the same chain
7569     SDValue LChain = L->getChain();
7570     if (!Chain)
7571       Chain = LChain;
7572     else if (Chain != LChain)
7573       return SDValue();
7574
7575     // Loads must share the same base address
7576     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7577     int64_t ByteOffsetFromBase = 0;
7578     if (!Base)
7579       Base = Ptr;
7580     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7581       return SDValue();
7582
7583     // Calculate the offset of the current byte from the base address
7584     ByteOffsetFromBase += MemoryByteOffset(*P);
7585     ByteOffsets[i] = ByteOffsetFromBase;
7586
7587     // Remember the first byte load
7588     if (ByteOffsetFromBase < FirstOffset) {
7589       FirstByteProvider = P;
7590       FirstOffset = ByteOffsetFromBase;
7591     }
7592
7593     Loads.insert(L);
7594   }
7595   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7596          "memory, so there must be at least one load which produces the value");
7597   assert(Base && "Base address of the accessed memory location must be set");
7598   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7599
7600   bool NeedsZext = ZeroExtendedBytes > 0;
7601
7602   EVT MemVT =
7603       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7604
7605   if (!MemVT.isSimple())
7606     return SDValue();
7607
7608   // Before legalize we can introduce too wide illegal loads which will be later
7609   // split into legal sized loads. This enables us to combine i64 load by i8
7610   // patterns to a couple of i32 loads on 32 bit targets.
7611   if (LegalOperations &&
7612       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7613                             MemVT))
7614     return SDValue();
7615
7616   // Check if the bytes of the OR we are looking at match with either big or
7617   // little endian value load
7618   Optional<bool> IsBigEndian = isBigEndian(
7619       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7620   if (!IsBigEndian.hasValue())
7621     return SDValue();
7622
7623   assert(FirstByteProvider && "must be set");
7624
7625   // Ensure that the first byte is loaded from zero offset of the first load.
7626   // So the combined value can be loaded from the first load address.
7627   if (MemoryByteOffset(*FirstByteProvider) != 0)
7628     return SDValue();
7629   LoadSDNode *FirstLoad = FirstByteProvider->Load;
7630
7631   // The node we are looking at matches with the pattern, check if we can
7632   // replace it with a single (possibly zero-extended) load and bswap + shift if
7633   // needed.
7634
7635   // If the load needs byte swap check if the target supports it
7636   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7637
7638   // Before legalize we can introduce illegal bswaps which will be later
7639   // converted to an explicit bswap sequence. This way we end up with a single
7640   // load and byte shuffling instead of several loads and byte shuffling.
7641   // We do not introduce illegal bswaps when zero-extending as this tends to
7642   // introduce too many arithmetic instructions.
7643   if (NeedsBswap && (LegalOperations || NeedsZext) &&
7644       !TLI.isOperationLegal(ISD::BSWAP, VT))
7645     return SDValue();
7646
7647   // If we need to bswap and zero extend, we have to insert a shift. Check that
7648   // it is legal.
7649   if (NeedsBswap && NeedsZext && LegalOperations &&
7650       !TLI.isOperationLegal(ISD::SHL, VT))
7651     return SDValue();
7652
7653   // Check that a load of the wide type is both allowed and fast on the target
7654   bool Fast = false;
7655   bool Allowed =
7656       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7657                              *FirstLoad->getMemOperand(), &Fast);
7658   if (!Allowed || !Fast)
7659     return SDValue();
7660
7661   SDValue NewLoad =
7662       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7663                      Chain, FirstLoad->getBasePtr(),
7664                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7665
7666   // Transfer chain users from old loads to the new load.
7667   for (LoadSDNode *L : Loads)
7668     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7669
7670   if (!NeedsBswap)
7671     return NewLoad;
7672
7673   SDValue ShiftedLoad =
7674       NeedsZext
7675           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7676                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7677                                                    SDLoc(N), LegalOperations))
7678           : NewLoad;
7679   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7680 }
7681
7682 // If the target has andn, bsl, or a similar bit-select instruction,
7683 // we want to unfold masked merge, with canonical pattern of:
7684 //   |        A  |  |B|
7685 //   ((x ^ y) & m) ^ y
7686 //    |  D  |
7687 // Into:
7688 //   (x & m) | (y & ~m)
7689 // If y is a constant, and the 'andn' does not work with immediates,
7690 // we unfold into a different pattern:
7691 //   ~(~x & m) & (m | y)
7692 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7693 //       the very least that breaks andnpd / andnps patterns, and because those
7694 //       patterns are simplified in IR and shouldn't be created in the DAG
7695 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7696   assert(N->getOpcode() == ISD::XOR);
7697
7698   // Don't touch 'not' (i.e. where y = -1).
7699   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7700     return SDValue();
7701
7702   EVT VT = N->getValueType(0);
7703
7704   // There are 3 commutable operators in the pattern,
7705   // so we have to deal with 8 possible variants of the basic pattern.
7706   SDValue X, Y, M;
7707   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7708     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7709       return false;
7710     SDValue Xor = And.getOperand(XorIdx);
7711     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7712       return false;
7713     SDValue Xor0 = Xor.getOperand(0);
7714     SDValue Xor1 = Xor.getOperand(1);
7715     // Don't touch 'not' (i.e. where y = -1).
7716     if (isAllOnesOrAllOnesSplat(Xor1))
7717       return false;
7718     if (Other == Xor0)
7719       std::swap(Xor0, Xor1);
7720     if (Other != Xor1)
7721       return false;
7722     X = Xor0;
7723     Y = Xor1;
7724     M = And.getOperand(XorIdx ? 0 : 1);
7725     return true;
7726   };
7727
7728   SDValue N0 = N->getOperand(0);
7729   SDValue N1 = N->getOperand(1);
7730   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7731       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7732     return SDValue();
7733
7734   // Don't do anything if the mask is constant. This should not be reachable.
7735   // InstCombine should have already unfolded this pattern, and DAGCombiner
7736   // probably shouldn't produce it, too.
7737   if (isa<ConstantSDNode>(M.getNode()))
7738     return SDValue();
7739
7740   // We can transform if the target has AndNot
7741   if (!TLI.hasAndNot(M))
7742     return SDValue();
7743
7744   SDLoc DL(N);
7745
7746   // If Y is a constant, check that 'andn' works with immediates.
7747   if (!TLI.hasAndNot(Y)) {
7748     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7749     // If not, we need to do a bit more work to make sure andn is still used.
7750     SDValue NotX = DAG.getNOT(DL, X, VT);
7751     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7752     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7753     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7754     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7755   }
7756
7757   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7758   SDValue NotM = DAG.getNOT(DL, M, VT);
7759   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7760
7761   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7762 }
7763
7764 SDValue DAGCombiner::visitXOR(SDNode *N) {
7765   SDValue N0 = N->getOperand(0);
7766   SDValue N1 = N->getOperand(1);
7767   EVT VT = N0.getValueType();
7768
7769   // fold vector ops
7770   if (VT.isVector()) {
7771     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7772       return FoldedVOp;
7773
7774     // fold (xor x, 0) -> x, vector edition
7775     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
7776       return N1;
7777     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7778       return N0;
7779   }
7780
7781   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7782   SDLoc DL(N);
7783   if (N0.isUndef() && N1.isUndef())
7784     return DAG.getConstant(0, DL, VT);
7785
7786   // fold (xor x, undef) -> undef
7787   if (N0.isUndef())
7788     return N0;
7789   if (N1.isUndef())
7790     return N1;
7791
7792   // fold (xor c1, c2) -> c1^c2
7793   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7794     return C;
7795
7796   // canonicalize constant to RHS
7797   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7798      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7799     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7800
7801   // fold (xor x, 0) -> x
7802   if (isNullConstant(N1))
7803     return N0;
7804
7805   if (SDValue NewSel = foldBinOpIntoSelect(N))
7806     return NewSel;
7807
7808   // reassociate xor
7809   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7810     return RXOR;
7811
7812   // fold !(x cc y) -> (x !cc y)
7813   unsigned N0Opcode = N0.getOpcode();
7814   SDValue LHS, RHS, CC;
7815   if (TLI.isConstTrueVal(N1.getNode()) &&
7816       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7817     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7818                                                LHS.getValueType());
7819     if (!LegalOperations ||
7820         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7821       switch (N0Opcode) {
7822       default:
7823         llvm_unreachable("Unhandled SetCC Equivalent!");
7824       case ISD::SETCC:
7825         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7826       case ISD::SELECT_CC:
7827         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7828                                N0.getOperand(3), NotCC);
7829       case ISD::STRICT_FSETCC:
7830       case ISD::STRICT_FSETCCS: {
7831         if (N0.hasOneUse()) {
7832           // FIXME Can we handle multiple uses? Could we token factor the chain
7833           // results from the new/old setcc?
7834           SDValue SetCC =
7835               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7836                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
7837           CombineTo(N, SetCC);
7838           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7839           recursivelyDeleteUnusedNodes(N0.getNode());
7840           return SDValue(N, 0); // Return N so it doesn't get rechecked!
7841         }
7842         break;
7843       }
7844       }
7845     }
7846   }
7847
7848   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7849   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7850       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7851     SDValue V = N0.getOperand(0);
7852     SDLoc DL0(N0);
7853     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7854                     DAG.getConstant(1, DL0, V.getValueType()));
7855     AddToWorklist(V.getNode());
7856     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7857   }
7858
7859   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7860   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7861       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7862     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7863     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7864       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7865       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7866       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7867       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7868       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7869     }
7870   }
7871   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7872   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7873       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7874     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7875     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7876       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7877       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7878       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7879       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7880       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7881     }
7882   }
7883
7884   // fold (not (neg x)) -> (add X, -1)
7885   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7886   // Y is a constant or the subtract has a single use.
7887   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7888       isNullConstant(N0.getOperand(0))) {
7889     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7890                        DAG.getAllOnesConstant(DL, VT));
7891   }
7892
7893   // fold (not (add X, -1)) -> (neg X)
7894   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7895       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7896     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7897                        N0.getOperand(0));
7898   }
7899
7900   // fold (xor (and x, y), y) -> (and (not x), y)
7901   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7902     SDValue X = N0.getOperand(0);
7903     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7904     AddToWorklist(NotX.getNode());
7905     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7906   }
7907
7908   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7909     ConstantSDNode *XorC = isConstOrConstSplat(N1);
7910     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7911     unsigned BitWidth = VT.getScalarSizeInBits();
7912     if (XorC && ShiftC) {
7913       // Don't crash on an oversized shift. We can not guarantee that a bogus
7914       // shift has been simplified to undef.
7915       uint64_t ShiftAmt = ShiftC->getLimitedValue();
7916       if (ShiftAmt < BitWidth) {
7917         APInt Ones = APInt::getAllOnesValue(BitWidth);
7918         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7919         if (XorC->getAPIntValue() == Ones) {
7920           // If the xor constant is a shifted -1, do a 'not' before the shift:
7921           // xor (X << ShiftC), XorC --> (not X) << ShiftC
7922           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7923           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7924           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7925         }
7926       }
7927     }
7928   }
7929
7930   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7931   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7932     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7933     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7934     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7935       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7936       SDValue S0 = S.getOperand(0);
7937       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
7938         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7939           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
7940             return DAG.getNode(ISD::ABS, DL, VT, S0);
7941     }
7942   }
7943
7944   // fold (xor x, x) -> 0
7945   if (N0 == N1)
7946     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7947
7948   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7949   // Here is a concrete example of this equivalence:
7950   // i16   x ==  14
7951   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7952   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7953   //
7954   // =>
7955   //
7956   // i16     ~1      == 0b1111111111111110
7957   // i16 rol(~1, 14) == 0b1011111111111111
7958   //
7959   // Some additional tips to help conceptualize this transform:
7960   // - Try to see the operation as placing a single zero in a value of all ones.
7961   // - There exists no value for x which would allow the result to contain zero.
7962   // - Values of x larger than the bitwidth are undefined and do not require a
7963   //   consistent result.
7964   // - Pushing the zero left requires shifting one bits in from the right.
7965   // A rotate left of ~1 is a nice way of achieving the desired result.
7966   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7967       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7968     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7969                        N0.getOperand(1));
7970   }
7971
7972   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7973   if (N0Opcode == N1.getOpcode())
7974     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7975       return V;
7976
7977   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7978   if (SDValue MM = unfoldMaskedMerge(N))
7979     return MM;
7980
7981   // Simplify the expression using non-local knowledge.
7982   if (SimplifyDemandedBits(SDValue(N, 0)))
7983     return SDValue(N, 0);
7984
7985   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7986     return Combined;
7987
7988   return SDValue();
7989 }
7990
7991 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7992 /// shift-by-constant operand with identical opcode, we may be able to convert
7993 /// that into 2 independent shifts followed by the logic op. This is a
7994 /// throughput improvement.
7995 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7996   // Match a one-use bitwise logic op.
7997   SDValue LogicOp = Shift->getOperand(0);
7998   if (!LogicOp.hasOneUse())
7999     return SDValue();
8000
8001   unsigned LogicOpcode = LogicOp.getOpcode();
8002   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8003       LogicOpcode != ISD::XOR)
8004     return SDValue();
8005
8006   // Find a matching one-use shift by constant.
8007   unsigned ShiftOpcode = Shift->getOpcode();
8008   SDValue C1 = Shift->getOperand(1);
8009   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8010   assert(C1Node && "Expected a shift with constant operand");
8011   const APInt &C1Val = C1Node->getAPIntValue();
8012   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8013                              const APInt *&ShiftAmtVal) {
8014     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8015       return false;
8016
8017     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8018     if (!ShiftCNode)
8019       return false;
8020
8021     // Capture the shifted operand and shift amount value.
8022     ShiftOp = V.getOperand(0);
8023     ShiftAmtVal = &ShiftCNode->getAPIntValue();
8024
8025     // Shift amount types do not have to match their operand type, so check that
8026     // the constants are the same width.
8027     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8028       return false;
8029
8030     // The fold is not valid if the sum of the shift values exceeds bitwidth.
8031     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8032       return false;
8033
8034     return true;
8035   };
8036
8037   // Logic ops are commutative, so check each operand for a match.
8038   SDValue X, Y;
8039   const APInt *C0Val;
8040   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8041     Y = LogicOp.getOperand(1);
8042   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8043     Y = LogicOp.getOperand(0);
8044   else
8045     return SDValue();
8046
8047   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8048   SDLoc DL(Shift);
8049   EVT VT = Shift->getValueType(0);
8050   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8051   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8052   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8053   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8054   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8055 }
8056
8057 /// Handle transforms common to the three shifts, when the shift amount is a
8058 /// constant.
8059 /// We are looking for: (shift being one of shl/sra/srl)
8060 ///   shift (binop X, C0), C1
8061 /// And want to transform into:
8062 ///   binop (shift X, C1), (shift C0, C1)
8063 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8064   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
8065
8066   // Do not turn a 'not' into a regular xor.
8067   if (isBitwiseNot(N->getOperand(0)))
8068     return SDValue();
8069
8070   // The inner binop must be one-use, since we want to replace it.
8071   SDValue LHS = N->getOperand(0);
8072   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8073     return SDValue();
8074
8075   // TODO: This is limited to early combining because it may reveal regressions
8076   //       otherwise. But since we just checked a target hook to see if this is
8077   //       desirable, that should have filtered out cases where this interferes
8078   //       with some other pattern matching.
8079   if (!LegalTypes)
8080     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8081       return R;
8082
8083   // We want to pull some binops through shifts, so that we have (and (shift))
8084   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
8085   // thing happens with address calculations, so it's important to canonicalize
8086   // it.
8087   switch (LHS.getOpcode()) {
8088   default:
8089     return SDValue();
8090   case ISD::OR:
8091   case ISD::XOR:
8092   case ISD::AND:
8093     break;
8094   case ISD::ADD:
8095     if (N->getOpcode() != ISD::SHL)
8096       return SDValue(); // only shl(add) not sr[al](add).
8097     break;
8098   }
8099
8100   // We require the RHS of the binop to be a constant and not opaque as well.
8101   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8102   if (!BinOpCst)
8103     return SDValue();
8104
8105   // FIXME: disable this unless the input to the binop is a shift by a constant
8106   // or is copy/select. Enable this in other cases when figure out it's exactly
8107   // profitable.
8108   SDValue BinOpLHSVal = LHS.getOperand(0);
8109   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8110                             BinOpLHSVal.getOpcode() == ISD::SRA ||
8111                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
8112                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8113   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8114                         BinOpLHSVal.getOpcode() == ISD::SELECT;
8115
8116   if (!IsShiftByConstant && !IsCopyOrSelect)
8117     return SDValue();
8118
8119   if (IsCopyOrSelect && N->hasOneUse())
8120     return SDValue();
8121
8122   // Fold the constants, shifting the binop RHS by the shift amount.
8123   SDLoc DL(N);
8124   EVT VT = N->getValueType(0);
8125   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8126                                N->getOperand(1));
8127   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
8128
8129   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8130                                  N->getOperand(1));
8131   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8132 }
8133
8134 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8135   assert(N->getOpcode() == ISD::TRUNCATE);
8136   assert(N->getOperand(0).getOpcode() == ISD::AND);
8137
8138   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8139   EVT TruncVT = N->getValueType(0);
8140   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8141       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8142     SDValue N01 = N->getOperand(0).getOperand(1);
8143     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8144       SDLoc DL(N);
8145       SDValue N00 = N->getOperand(0).getOperand(0);
8146       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8147       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8148       AddToWorklist(Trunc00.getNode());
8149       AddToWorklist(Trunc01.getNode());
8150       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8151     }
8152   }
8153
8154   return SDValue();
8155 }
8156
8157 SDValue DAGCombiner::visitRotate(SDNode *N) {
8158   SDLoc dl(N);
8159   SDValue N0 = N->getOperand(0);
8160   SDValue N1 = N->getOperand(1);
8161   EVT VT = N->getValueType(0);
8162   unsigned Bitsize = VT.getScalarSizeInBits();
8163
8164   // fold (rot x, 0) -> x
8165   if (isNullOrNullSplat(N1))
8166     return N0;
8167
8168   // fold (rot x, c) -> x iff (c % BitSize) == 0
8169   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8170     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8171     if (DAG.MaskedValueIsZero(N1, ModuloMask))
8172       return N0;
8173   }
8174
8175   // fold (rot x, c) -> (rot x, c % BitSize)
8176   bool OutOfRange = false;
8177   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8178     OutOfRange |= C->getAPIntValue().uge(Bitsize);
8179     return true;
8180   };
8181   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8182     EVT AmtVT = N1.getValueType();
8183     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8184     if (SDValue Amt =
8185             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8186       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8187   }
8188
8189   // rot i16 X, 8 --> bswap X
8190   auto *RotAmtC = isConstOrConstSplat(N1);
8191   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8192       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8193     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8194
8195   // Simplify the operands using demanded-bits information.
8196   if (SimplifyDemandedBits(SDValue(N, 0)))
8197     return SDValue(N, 0);
8198
8199   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8200   if (N1.getOpcode() == ISD::TRUNCATE &&
8201       N1.getOperand(0).getOpcode() == ISD::AND) {
8202     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8203       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8204   }
8205
8206   unsigned NextOp = N0.getOpcode();
8207   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8208   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8209     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8210     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8211     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8212       EVT ShiftVT = C1->getValueType(0);
8213       bool SameSide = (N->getOpcode() == NextOp);
8214       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8215       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8216               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8217         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8218         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8219             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8220         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8221                            CombinedShiftNorm);
8222       }
8223     }
8224   }
8225   return SDValue();
8226 }
8227
8228 SDValue DAGCombiner::visitSHL(SDNode *N) {
8229   SDValue N0 = N->getOperand(0);
8230   SDValue N1 = N->getOperand(1);
8231   if (SDValue V = DAG.simplifyShift(N0, N1))
8232     return V;
8233
8234   EVT VT = N0.getValueType();
8235   EVT ShiftVT = N1.getValueType();
8236   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8237
8238   // fold vector ops
8239   if (VT.isVector()) {
8240     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8241       return FoldedVOp;
8242
8243     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8244     // If setcc produces all-one true value then:
8245     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8246     if (N1CV && N1CV->isConstant()) {
8247       if (N0.getOpcode() == ISD::AND) {
8248         SDValue N00 = N0->getOperand(0);
8249         SDValue N01 = N0->getOperand(1);
8250         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8251
8252         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8253             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8254                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8255           if (SDValue C =
8256                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8257             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8258         }
8259       }
8260     }
8261   }
8262
8263   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8264
8265   // fold (shl c1, c2) -> c1<<c2
8266   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8267     return C;
8268
8269   if (SDValue NewSel = foldBinOpIntoSelect(N))
8270     return NewSel;
8271
8272   // if (shl x, c) is known to be zero, return 0
8273   if (DAG.MaskedValueIsZero(SDValue(N, 0),
8274                             APInt::getAllOnesValue(OpSizeInBits)))
8275     return DAG.getConstant(0, SDLoc(N), VT);
8276
8277   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8278   if (N1.getOpcode() == ISD::TRUNCATE &&
8279       N1.getOperand(0).getOpcode() == ISD::AND) {
8280     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8281       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8282   }
8283
8284   if (SimplifyDemandedBits(SDValue(N, 0)))
8285     return SDValue(N, 0);
8286
8287   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8288   if (N0.getOpcode() == ISD::SHL) {
8289     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8290                                           ConstantSDNode *RHS) {
8291       APInt c1 = LHS->getAPIntValue();
8292       APInt c2 = RHS->getAPIntValue();
8293       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8294       return (c1 + c2).uge(OpSizeInBits);
8295     };
8296     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8297       return DAG.getConstant(0, SDLoc(N), VT);
8298
8299     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8300                                        ConstantSDNode *RHS) {
8301       APInt c1 = LHS->getAPIntValue();
8302       APInt c2 = RHS->getAPIntValue();
8303       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8304       return (c1 + c2).ult(OpSizeInBits);
8305     };
8306     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8307       SDLoc DL(N);
8308       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8309       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8310     }
8311   }
8312
8313   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8314   // For this to be valid, the second form must not preserve any of the bits
8315   // that are shifted out by the inner shift in the first form.  This means
8316   // the outer shift size must be >= the number of bits added by the ext.
8317   // As a corollary, we don't care what kind of ext it is.
8318   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8319        N0.getOpcode() == ISD::ANY_EXTEND ||
8320        N0.getOpcode() == ISD::SIGN_EXTEND) &&
8321       N0.getOperand(0).getOpcode() == ISD::SHL) {
8322     SDValue N0Op0 = N0.getOperand(0);
8323     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8324     EVT InnerVT = N0Op0.getValueType();
8325     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8326
8327     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8328                                                          ConstantSDNode *RHS) {
8329       APInt c1 = LHS->getAPIntValue();
8330       APInt c2 = RHS->getAPIntValue();
8331       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8332       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8333              (c1 + c2).uge(OpSizeInBits);
8334     };
8335     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8336                                   /*AllowUndefs*/ false,
8337                                   /*AllowTypeMismatch*/ true))
8338       return DAG.getConstant(0, SDLoc(N), VT);
8339
8340     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8341                                                       ConstantSDNode *RHS) {
8342       APInt c1 = LHS->getAPIntValue();
8343       APInt c2 = RHS->getAPIntValue();
8344       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8345       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8346              (c1 + c2).ult(OpSizeInBits);
8347     };
8348     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8349                                   /*AllowUndefs*/ false,
8350                                   /*AllowTypeMismatch*/ true)) {
8351       SDLoc DL(N);
8352       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8353       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8354       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8355       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8356     }
8357   }
8358
8359   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8360   // Only fold this if the inner zext has no other uses to avoid increasing
8361   // the total number of instructions.
8362   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8363       N0.getOperand(0).getOpcode() == ISD::SRL) {
8364     SDValue N0Op0 = N0.getOperand(0);
8365     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8366
8367     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8368       APInt c1 = LHS->getAPIntValue();
8369       APInt c2 = RHS->getAPIntValue();
8370       zeroExtendToMatch(c1, c2);
8371       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8372     };
8373     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8374                                   /*AllowUndefs*/ false,
8375                                   /*AllowTypeMismatch*/ true)) {
8376       SDLoc DL(N);
8377       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8378       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8379       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8380       AddToWorklist(NewSHL.getNode());
8381       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8382     }
8383   }
8384
8385   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
8386   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
8387   // TODO - support non-uniform vector shift amounts.
8388   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8389       N0->getFlags().hasExact()) {
8390     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8391       uint64_t C1 = N0C1->getZExtValue();
8392       uint64_t C2 = N1C->getZExtValue();
8393       SDLoc DL(N);
8394       if (C1 <= C2)
8395         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8396                            DAG.getConstant(C2 - C1, DL, ShiftVT));
8397       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8398                          DAG.getConstant(C1 - C2, DL, ShiftVT));
8399     }
8400   }
8401
8402   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8403   //                               (and (srl x, (sub c1, c2), MASK)
8404   // Only fold this if the inner shift has no other uses -- if it does, folding
8405   // this will increase the total number of instructions.
8406   // TODO - drop hasOneUse requirement if c1 == c2?
8407   // TODO - support non-uniform vector shift amounts.
8408   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8409       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8410     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8411       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8412         uint64_t c1 = N0C1->getZExtValue();
8413         uint64_t c2 = N1C->getZExtValue();
8414         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8415         SDValue Shift;
8416         if (c2 > c1) {
8417           Mask <<= c2 - c1;
8418           SDLoc DL(N);
8419           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8420                               DAG.getConstant(c2 - c1, DL, ShiftVT));
8421         } else {
8422           Mask.lshrInPlace(c1 - c2);
8423           SDLoc DL(N);
8424           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8425                               DAG.getConstant(c1 - c2, DL, ShiftVT));
8426         }
8427         SDLoc DL(N0);
8428         return DAG.getNode(ISD::AND, DL, VT, Shift,
8429                            DAG.getConstant(Mask, DL, VT));
8430       }
8431     }
8432   }
8433
8434   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8435   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8436       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8437     SDLoc DL(N);
8438     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8439     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8440     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8441   }
8442
8443   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8444   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8445   // Variant of version done on multiply, except mul by a power of 2 is turned
8446   // into a shift.
8447   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8448       N0.getNode()->hasOneUse() &&
8449       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8450       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8451       TLI.isDesirableToCommuteWithShift(N, Level)) {
8452     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8453     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8454     AddToWorklist(Shl0.getNode());
8455     AddToWorklist(Shl1.getNode());
8456     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8457   }
8458
8459   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8460   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8461       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8462       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8463     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8464     if (isConstantOrConstantVector(Shl))
8465       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8466   }
8467
8468   if (N1C && !N1C->isOpaque())
8469     if (SDValue NewSHL = visitShiftByConstant(N))
8470       return NewSHL;
8471
8472   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8473   if (N0.getOpcode() == ISD::VSCALE)
8474     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8475       const APInt &C0 = N0.getConstantOperandAPInt(0);
8476       const APInt &C1 = NC1->getAPIntValue();
8477       return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8478     }
8479
8480   // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8481   APInt ShlVal;
8482   if (N0.getOpcode() == ISD::STEP_VECTOR)
8483     if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8484       const APInt &C0 = N0.getConstantOperandAPInt(0);
8485       if (ShlVal.ult(C0.getBitWidth())) {
8486         APInt NewStep = C0 << ShlVal;
8487         return DAG.getStepVector(SDLoc(N), VT, NewStep);
8488       }
8489     }
8490
8491   return SDValue();
8492 }
8493
8494 // Transform a right shift of a multiply into a multiply-high.
8495 // Examples:
8496 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8497 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8498 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8499                                   const TargetLowering &TLI) {
8500   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8501          "SRL or SRA node is required here!");
8502
8503   // Check the shift amount. Proceed with the transformation if the shift
8504   // amount is constant.
8505   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8506   if (!ShiftAmtSrc)
8507     return SDValue();
8508
8509   SDLoc DL(N);
8510
8511   // The operation feeding into the shift must be a multiply.
8512   SDValue ShiftOperand = N->getOperand(0);
8513   if (ShiftOperand.getOpcode() != ISD::MUL)
8514     return SDValue();
8515
8516   // Both operands must be equivalent extend nodes.
8517   SDValue LeftOp = ShiftOperand.getOperand(0);
8518   SDValue RightOp = ShiftOperand.getOperand(1);
8519   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8520   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8521
8522   if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8523     return SDValue();
8524
8525   EVT WideVT1 = LeftOp.getValueType();
8526   EVT WideVT2 = RightOp.getValueType();
8527   (void)WideVT2;
8528   // Proceed with the transformation if the wide types match.
8529   assert((WideVT1 == WideVT2) &&
8530          "Cannot have a multiply node with two different operand types.");
8531
8532   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8533   // Check that the two extend nodes are the same type.
8534   if (NarrowVT !=  RightOp.getOperand(0).getValueType())
8535     return SDValue();
8536
8537   // Proceed with the transformation if the wide type is twice as large
8538   // as the narrow type.
8539   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8540   if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8541     return SDValue();
8542
8543   // Check the shift amount with the narrow type size.
8544   // Proceed with the transformation if the shift amount is the width
8545   // of the narrow type.
8546   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8547   if (ShiftAmt != NarrowVTSize)
8548     return SDValue();
8549
8550   // If the operation feeding into the MUL is a sign extend (sext),
8551   // we use mulhs. Othewise, zero extends (zext) use mulhu.
8552   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8553
8554   // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8555   if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8556     return SDValue();
8557
8558   SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8559                                RightOp.getOperand(0));
8560   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8561                                      : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8562 }
8563
8564 SDValue DAGCombiner::visitSRA(SDNode *N) {
8565   SDValue N0 = N->getOperand(0);
8566   SDValue N1 = N->getOperand(1);
8567   if (SDValue V = DAG.simplifyShift(N0, N1))
8568     return V;
8569
8570   EVT VT = N0.getValueType();
8571   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8572
8573   // Arithmetic shifting an all-sign-bit value is a no-op.
8574   // fold (sra 0, x) -> 0
8575   // fold (sra -1, x) -> -1
8576   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8577     return N0;
8578
8579   // fold vector ops
8580   if (VT.isVector())
8581     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8582       return FoldedVOp;
8583
8584   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8585
8586   // fold (sra c1, c2) -> (sra c1, c2)
8587   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8588     return C;
8589
8590   if (SDValue NewSel = foldBinOpIntoSelect(N))
8591     return NewSel;
8592
8593   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8594   // sext_inreg.
8595   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8596     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8597     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8598     if (VT.isVector())
8599       ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8600                                VT.getVectorElementCount());
8601     if (!LegalOperations ||
8602         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8603         TargetLowering::Legal)
8604       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8605                          N0.getOperand(0), DAG.getValueType(ExtVT));
8606     // Even if we can't convert to sext_inreg, we might be able to remove
8607     // this shift pair if the input is already sign extended.
8608     if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8609       return N0.getOperand(0);
8610   }
8611
8612   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8613   // clamp (add c1, c2) to max shift.
8614   if (N0.getOpcode() == ISD::SRA) {
8615     SDLoc DL(N);
8616     EVT ShiftVT = N1.getValueType();
8617     EVT ShiftSVT = ShiftVT.getScalarType();
8618     SmallVector<SDValue, 16> ShiftValues;
8619
8620     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8621       APInt c1 = LHS->getAPIntValue();
8622       APInt c2 = RHS->getAPIntValue();
8623       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8624       APInt Sum = c1 + c2;
8625       unsigned ShiftSum =
8626           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8627       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8628       return true;
8629     };
8630     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8631       SDValue ShiftValue;
8632       if (N1.getOpcode() == ISD::BUILD_VECTOR)
8633         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8634       else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
8635         assert(ShiftValues.size() == 1 &&
8636                "Expected matchBinaryPredicate to return one element for "
8637                "SPLAT_VECTORs");
8638         ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
8639       } else
8640         ShiftValue = ShiftValues[0];
8641       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8642     }
8643   }
8644
8645   // fold (sra (shl X, m), (sub result_size, n))
8646   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8647   // result_size - n != m.
8648   // If truncate is free for the target sext(shl) is likely to result in better
8649   // code.
8650   if (N0.getOpcode() == ISD::SHL && N1C) {
8651     // Get the two constanst of the shifts, CN0 = m, CN = n.
8652     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8653     if (N01C) {
8654       LLVMContext &Ctx = *DAG.getContext();
8655       // Determine what the truncate's result bitsize and type would be.
8656       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8657
8658       if (VT.isVector())
8659         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8660
8661       // Determine the residual right-shift amount.
8662       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8663
8664       // If the shift is not a no-op (in which case this should be just a sign
8665       // extend already), the truncated to type is legal, sign_extend is legal
8666       // on that type, and the truncate to that type is both legal and free,
8667       // perform the transform.
8668       if ((ShiftAmt > 0) &&
8669           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8670           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8671           TLI.isTruncateFree(VT, TruncVT)) {
8672         SDLoc DL(N);
8673         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8674             getShiftAmountTy(N0.getOperand(0).getValueType()));
8675         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8676                                     N0.getOperand(0), Amt);
8677         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8678                                     Shift);
8679         return DAG.getNode(ISD::SIGN_EXTEND, DL,
8680                            N->getValueType(0), Trunc);
8681       }
8682     }
8683   }
8684
8685   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8686   //   sra (add (shl X, N1C), AddC), N1C -->
8687   //   sext (add (trunc X to (width - N1C)), AddC')
8688   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8689       N0.getOperand(0).getOpcode() == ISD::SHL &&
8690       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8691     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8692       SDValue Shl = N0.getOperand(0);
8693       // Determine what the truncate's type would be and ask the target if that
8694       // is a free operation.
8695       LLVMContext &Ctx = *DAG.getContext();
8696       unsigned ShiftAmt = N1C->getZExtValue();
8697       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8698       if (VT.isVector())
8699         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8700
8701       // TODO: The simple type check probably belongs in the default hook
8702       //       implementation and/or target-specific overrides (because
8703       //       non-simple types likely require masking when legalized), but that
8704       //       restriction may conflict with other transforms.
8705       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8706           TLI.isTruncateFree(VT, TruncVT)) {
8707         SDLoc DL(N);
8708         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8709         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8710                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8711         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8712         return DAG.getSExtOrTrunc(Add, DL, VT);
8713       }
8714     }
8715   }
8716
8717   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8718   if (N1.getOpcode() == ISD::TRUNCATE &&
8719       N1.getOperand(0).getOpcode() == ISD::AND) {
8720     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8721       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8722   }
8723
8724   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8725   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8726   //      if c1 is equal to the number of bits the trunc removes
8727   // TODO - support non-uniform vector shift amounts.
8728   if (N0.getOpcode() == ISD::TRUNCATE &&
8729       (N0.getOperand(0).getOpcode() == ISD::SRL ||
8730        N0.getOperand(0).getOpcode() == ISD::SRA) &&
8731       N0.getOperand(0).hasOneUse() &&
8732       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8733     SDValue N0Op0 = N0.getOperand(0);
8734     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8735       EVT LargeVT = N0Op0.getValueType();
8736       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8737       if (LargeShift->getAPIntValue() == TruncBits) {
8738         SDLoc DL(N);
8739         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8740                                       getShiftAmountTy(LargeVT));
8741         SDValue SRA =
8742             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8743         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8744       }
8745     }
8746   }
8747
8748   // Simplify, based on bits shifted out of the LHS.
8749   if (SimplifyDemandedBits(SDValue(N, 0)))
8750     return SDValue(N, 0);
8751
8752   // If the sign bit is known to be zero, switch this to a SRL.
8753   if (DAG.SignBitIsZero(N0))
8754     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8755
8756   if (N1C && !N1C->isOpaque())
8757     if (SDValue NewSRA = visitShiftByConstant(N))
8758       return NewSRA;
8759
8760   // Try to transform this shift into a multiply-high if
8761   // it matches the appropriate pattern detected in combineShiftToMULH.
8762   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8763     return MULH;
8764
8765   return SDValue();
8766 }
8767
8768 SDValue DAGCombiner::visitSRL(SDNode *N) {
8769   SDValue N0 = N->getOperand(0);
8770   SDValue N1 = N->getOperand(1);
8771   if (SDValue V = DAG.simplifyShift(N0, N1))
8772     return V;
8773
8774   EVT VT = N0.getValueType();
8775   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8776
8777   // fold vector ops
8778   if (VT.isVector())
8779     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8780       return FoldedVOp;
8781
8782   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8783
8784   // fold (srl c1, c2) -> c1 >>u c2
8785   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8786     return C;
8787
8788   if (SDValue NewSel = foldBinOpIntoSelect(N))
8789     return NewSel;
8790
8791   // if (srl x, c) is known to be zero, return 0
8792   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8793                                    APInt::getAllOnesValue(OpSizeInBits)))
8794     return DAG.getConstant(0, SDLoc(N), VT);
8795
8796   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8797   if (N0.getOpcode() == ISD::SRL) {
8798     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8799                                           ConstantSDNode *RHS) {
8800       APInt c1 = LHS->getAPIntValue();
8801       APInt c2 = RHS->getAPIntValue();
8802       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8803       return (c1 + c2).uge(OpSizeInBits);
8804     };
8805     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8806       return DAG.getConstant(0, SDLoc(N), VT);
8807
8808     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8809                                        ConstantSDNode *RHS) {
8810       APInt c1 = LHS->getAPIntValue();
8811       APInt c2 = RHS->getAPIntValue();
8812       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8813       return (c1 + c2).ult(OpSizeInBits);
8814     };
8815     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8816       SDLoc DL(N);
8817       EVT ShiftVT = N1.getValueType();
8818       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8819       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8820     }
8821   }
8822
8823   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8824       N0.getOperand(0).getOpcode() == ISD::SRL) {
8825     SDValue InnerShift = N0.getOperand(0);
8826     // TODO - support non-uniform vector shift amounts.
8827     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8828       uint64_t c1 = N001C->getZExtValue();
8829       uint64_t c2 = N1C->getZExtValue();
8830       EVT InnerShiftVT = InnerShift.getValueType();
8831       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8832       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8833       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8834       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8835       if (c1 + OpSizeInBits == InnerShiftSize) {
8836         SDLoc DL(N);
8837         if (c1 + c2 >= InnerShiftSize)
8838           return DAG.getConstant(0, DL, VT);
8839         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8840         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8841                                        InnerShift.getOperand(0), NewShiftAmt);
8842         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8843       }
8844       // In the more general case, we can clear the high bits after the shift:
8845       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8846       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8847           c1 + c2 < InnerShiftSize) {
8848         SDLoc DL(N);
8849         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8850         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8851                                        InnerShift.getOperand(0), NewShiftAmt);
8852         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8853                                                             OpSizeInBits - c2),
8854                                        DL, InnerShiftVT);
8855         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8856         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8857       }
8858     }
8859   }
8860
8861   // fold (srl (shl x, c), c) -> (and x, cst2)
8862   // TODO - (srl (shl x, c1), c2).
8863   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8864       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8865     SDLoc DL(N);
8866     SDValue Mask =
8867         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8868     AddToWorklist(Mask.getNode());
8869     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8870   }
8871
8872   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8873   // TODO - support non-uniform vector shift amounts.
8874   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8875     // Shifting in all undef bits?
8876     EVT SmallVT = N0.getOperand(0).getValueType();
8877     unsigned BitSize = SmallVT.getScalarSizeInBits();
8878     if (N1C->getAPIntValue().uge(BitSize))
8879       return DAG.getUNDEF(VT);
8880
8881     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8882       uint64_t ShiftAmt = N1C->getZExtValue();
8883       SDLoc DL0(N0);
8884       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8885                                        N0.getOperand(0),
8886                           DAG.getConstant(ShiftAmt, DL0,
8887                                           getShiftAmountTy(SmallVT)));
8888       AddToWorklist(SmallShift.getNode());
8889       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8890       SDLoc DL(N);
8891       return DAG.getNode(ISD::AND, DL, VT,
8892                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8893                          DAG.getConstant(Mask, DL, VT));
8894     }
8895   }
8896
8897   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
8898   // bit, which is unmodified by sra.
8899   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8900     if (N0.getOpcode() == ISD::SRA)
8901       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8902   }
8903
8904   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
8905   if (N1C && N0.getOpcode() == ISD::CTLZ &&
8906       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8907     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8908
8909     // If any of the input bits are KnownOne, then the input couldn't be all
8910     // zeros, thus the result of the srl will always be zero.
8911     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8912
8913     // If all of the bits input the to ctlz node are known to be zero, then
8914     // the result of the ctlz is "32" and the result of the shift is one.
8915     APInt UnknownBits = ~Known.Zero;
8916     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8917
8918     // Otherwise, check to see if there is exactly one bit input to the ctlz.
8919     if (UnknownBits.isPowerOf2()) {
8920       // Okay, we know that only that the single bit specified by UnknownBits
8921       // could be set on input to the CTLZ node. If this bit is set, the SRL
8922       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8923       // to an SRL/XOR pair, which is likely to simplify more.
8924       unsigned ShAmt = UnknownBits.countTrailingZeros();
8925       SDValue Op = N0.getOperand(0);
8926
8927       if (ShAmt) {
8928         SDLoc DL(N0);
8929         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8930                   DAG.getConstant(ShAmt, DL,
8931                                   getShiftAmountTy(Op.getValueType())));
8932         AddToWorklist(Op.getNode());
8933       }
8934
8935       SDLoc DL(N);
8936       return DAG.getNode(ISD::XOR, DL, VT,
8937                          Op, DAG.getConstant(1, DL, VT));
8938     }
8939   }
8940
8941   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8942   if (N1.getOpcode() == ISD::TRUNCATE &&
8943       N1.getOperand(0).getOpcode() == ISD::AND) {
8944     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8945       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8946   }
8947
8948   // fold operands of srl based on knowledge that the low bits are not
8949   // demanded.
8950   if (SimplifyDemandedBits(SDValue(N, 0)))
8951     return SDValue(N, 0);
8952
8953   if (N1C && !N1C->isOpaque())
8954     if (SDValue NewSRL = visitShiftByConstant(N))
8955       return NewSRL;
8956
8957   // Attempt to convert a srl of a load into a narrower zero-extending load.
8958   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8959     return NarrowLoad;
8960
8961   // Here is a common situation. We want to optimize:
8962   //
8963   //   %a = ...
8964   //   %b = and i32 %a, 2
8965   //   %c = srl i32 %b, 1
8966   //   brcond i32 %c ...
8967   //
8968   // into
8969   //
8970   //   %a = ...
8971   //   %b = and %a, 2
8972   //   %c = setcc eq %b, 0
8973   //   brcond %c ...
8974   //
8975   // However when after the source operand of SRL is optimized into AND, the SRL
8976   // itself may not be optimized further. Look for it and add the BRCOND into
8977   // the worklist.
8978   if (N->hasOneUse()) {
8979     SDNode *Use = *N->use_begin();
8980     if (Use->getOpcode() == ISD::BRCOND)
8981       AddToWorklist(Use);
8982     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8983       // Also look pass the truncate.
8984       Use = *Use->use_begin();
8985       if (Use->getOpcode() == ISD::BRCOND)
8986         AddToWorklist(Use);
8987     }
8988   }
8989
8990   // Try to transform this shift into a multiply-high if
8991   // it matches the appropriate pattern detected in combineShiftToMULH.
8992   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8993     return MULH;
8994
8995   return SDValue();
8996 }
8997
8998 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8999   EVT VT = N->getValueType(0);
9000   SDValue N0 = N->getOperand(0);
9001   SDValue N1 = N->getOperand(1);
9002   SDValue N2 = N->getOperand(2);
9003   bool IsFSHL = N->getOpcode() == ISD::FSHL;
9004   unsigned BitWidth = VT.getScalarSizeInBits();
9005
9006   // fold (fshl N0, N1, 0) -> N0
9007   // fold (fshr N0, N1, 0) -> N1
9008   if (isPowerOf2_32(BitWidth))
9009     if (DAG.MaskedValueIsZero(
9010             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9011       return IsFSHL ? N0 : N1;
9012
9013   auto IsUndefOrZero = [](SDValue V) {
9014     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9015   };
9016
9017   // TODO - support non-uniform vector shift amounts.
9018   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9019     EVT ShAmtTy = N2.getValueType();
9020
9021     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9022     if (Cst->getAPIntValue().uge(BitWidth)) {
9023       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9024       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9025                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9026     }
9027
9028     unsigned ShAmt = Cst->getZExtValue();
9029     if (ShAmt == 0)
9030       return IsFSHL ? N0 : N1;
9031
9032     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9033     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9034     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9035     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9036     if (IsUndefOrZero(N0))
9037       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9038                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9039                                          SDLoc(N), ShAmtTy));
9040     if (IsUndefOrZero(N1))
9041       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9042                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9043                                          SDLoc(N), ShAmtTy));
9044
9045     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9046     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9047     // TODO - bigendian support once we have test coverage.
9048     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9049     // TODO - permit LHS EXTLOAD if extensions are shifted out.
9050     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9051         !DAG.getDataLayout().isBigEndian()) {
9052       auto *LHS = dyn_cast<LoadSDNode>(N0);
9053       auto *RHS = dyn_cast<LoadSDNode>(N1);
9054       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9055           LHS->getAddressSpace() == RHS->getAddressSpace() &&
9056           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9057           ISD::isNON_EXTLoad(LHS)) {
9058         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9059           SDLoc DL(RHS);
9060           uint64_t PtrOff =
9061               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9062           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9063           bool Fast = false;
9064           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9065                                      RHS->getAddressSpace(), NewAlign,
9066                                      RHS->getMemOperand()->getFlags(), &Fast) &&
9067               Fast) {
9068             SDValue NewPtr = DAG.getMemBasePlusOffset(
9069                 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9070             AddToWorklist(NewPtr.getNode());
9071             SDValue Load = DAG.getLoad(
9072                 VT, DL, RHS->getChain(), NewPtr,
9073                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9074                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9075             // Replace the old load's chain with the new load's chain.
9076             WorklistRemover DeadNodes(*this);
9077             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9078             return Load;
9079           }
9080         }
9081       }
9082     }
9083   }
9084
9085   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9086   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9087   // iff We know the shift amount is in range.
9088   // TODO: when is it worth doing SUB(BW, N2) as well?
9089   if (isPowerOf2_32(BitWidth)) {
9090     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9091     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9092       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9093     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9094       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9095   }
9096
9097   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9098   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9099   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9100   // is legal as well we might be better off avoiding non-constant (BW - N2).
9101   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9102   if (N0 == N1 && hasOperation(RotOpc, VT))
9103     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9104
9105   // Simplify, based on bits shifted out of N0/N1.
9106   if (SimplifyDemandedBits(SDValue(N, 0)))
9107     return SDValue(N, 0);
9108
9109   return SDValue();
9110 }
9111
9112 // Given a ABS node, detect the following pattern:
9113 // (ABS (SUB (EXTEND a), (EXTEND b))).
9114 // Generates UABD/SABD instruction.
9115 static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9116                                const TargetLowering &TLI) {
9117   SDValue AbsOp1 = N->getOperand(0);
9118   SDValue Op0, Op1;
9119
9120   if (AbsOp1.getOpcode() != ISD::SUB)
9121     return SDValue();
9122
9123   Op0 = AbsOp1.getOperand(0);
9124   Op1 = AbsOp1.getOperand(1);
9125
9126   unsigned Opc0 = Op0.getOpcode();
9127   // Check if the operands of the sub are (zero|sign)-extended.
9128   if (Opc0 != Op1.getOpcode() ||
9129       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9130     return SDValue();
9131
9132   EVT VT1 = Op0.getOperand(0).getValueType();
9133   EVT VT2 = Op1.getOperand(0).getValueType();
9134   // Check if the operands are of same type and valid size.
9135   unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9136   if (VT1 != VT2 || !TLI.isOperationLegalOrCustom(ABDOpcode, VT1))
9137     return SDValue();
9138
9139   Op0 = Op0.getOperand(0);
9140   Op1 = Op1.getOperand(0);
9141   SDValue ABD =
9142       DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
9143   return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
9144 }
9145
9146 SDValue DAGCombiner::visitABS(SDNode *N) {
9147   SDValue N0 = N->getOperand(0);
9148   EVT VT = N->getValueType(0);
9149
9150   // fold (abs c1) -> c2
9151   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9152     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9153   // fold (abs (abs x)) -> (abs x)
9154   if (N0.getOpcode() == ISD::ABS)
9155     return N0;
9156   // fold (abs x) -> x iff not-negative
9157   if (DAG.SignBitIsZero(N0))
9158     return N0;
9159
9160   if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9161     return ABD;
9162
9163   return SDValue();
9164 }
9165
9166 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9167   SDValue N0 = N->getOperand(0);
9168   EVT VT = N->getValueType(0);
9169
9170   // fold (bswap c1) -> c2
9171   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9172     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9173   // fold (bswap (bswap x)) -> x
9174   if (N0.getOpcode() == ISD::BSWAP)
9175     return N0->getOperand(0);
9176   return SDValue();
9177 }
9178
9179 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9180   SDValue N0 = N->getOperand(0);
9181   EVT VT = N->getValueType(0);
9182
9183   // fold (bitreverse c1) -> c2
9184   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9185     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9186   // fold (bitreverse (bitreverse x)) -> x
9187   if (N0.getOpcode() == ISD::BITREVERSE)
9188     return N0.getOperand(0);
9189   return SDValue();
9190 }
9191
9192 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9193   SDValue N0 = N->getOperand(0);
9194   EVT VT = N->getValueType(0);
9195
9196   // fold (ctlz c1) -> c2
9197   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9198     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9199
9200   // If the value is known never to be zero, switch to the undef version.
9201   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9202     if (DAG.isKnownNeverZero(N0))
9203       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9204   }
9205
9206   return SDValue();
9207 }
9208
9209 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9210   SDValue N0 = N->getOperand(0);
9211   EVT VT = N->getValueType(0);
9212
9213   // fold (ctlz_zero_undef c1) -> c2
9214   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9215     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9216   return SDValue();
9217 }
9218
9219 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9220   SDValue N0 = N->getOperand(0);
9221   EVT VT = N->getValueType(0);
9222
9223   // fold (cttz c1) -> c2
9224   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9225     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9226
9227   // If the value is known never to be zero, switch to the undef version.
9228   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9229     if (DAG.isKnownNeverZero(N0))
9230       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9231   }
9232
9233   return SDValue();
9234 }
9235
9236 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9237   SDValue N0 = N->getOperand(0);
9238   EVT VT = N->getValueType(0);
9239
9240   // fold (cttz_zero_undef c1) -> c2
9241   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9242     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9243   return SDValue();
9244 }
9245
9246 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9247   SDValue N0 = N->getOperand(0);
9248   EVT VT = N->getValueType(0);
9249
9250   // fold (ctpop c1) -> c2
9251   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9252     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9253   return SDValue();
9254 }
9255
9256 // FIXME: This should be checking for no signed zeros on individual operands, as
9257 // well as no nans.
9258 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9259                                          SDValue RHS,
9260                                          const TargetLowering &TLI) {
9261   const TargetOptions &Options = DAG.getTarget().Options;
9262   EVT VT = LHS.getValueType();
9263
9264   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9265          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9266          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9267 }
9268
9269 /// Generate Min/Max node
9270 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9271                                    SDValue RHS, SDValue True, SDValue False,
9272                                    ISD::CondCode CC, const TargetLowering &TLI,
9273                                    SelectionDAG &DAG) {
9274   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9275     return SDValue();
9276
9277   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9278   switch (CC) {
9279   case ISD::SETOLT:
9280   case ISD::SETOLE:
9281   case ISD::SETLT:
9282   case ISD::SETLE:
9283   case ISD::SETULT:
9284   case ISD::SETULE: {
9285     // Since it's known never nan to get here already, either fminnum or
9286     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9287     // expanded in terms of it.
9288     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9289     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9290       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9291
9292     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9293     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9294       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9295     return SDValue();
9296   }
9297   case ISD::SETOGT:
9298   case ISD::SETOGE:
9299   case ISD::SETGT:
9300   case ISD::SETGE:
9301   case ISD::SETUGT:
9302   case ISD::SETUGE: {
9303     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9304     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9305       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9306
9307     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9308     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9309       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9310     return SDValue();
9311   }
9312   default:
9313     return SDValue();
9314   }
9315 }
9316
9317 /// If a (v)select has a condition value that is a sign-bit test, try to smear
9318 /// the condition operand sign-bit across the value width and use it as a mask.
9319 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9320   SDValue Cond = N->getOperand(0);
9321   SDValue C1 = N->getOperand(1);
9322   SDValue C2 = N->getOperand(2);
9323   if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
9324     return SDValue();
9325
9326   EVT VT = N->getValueType(0);
9327   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9328       VT != Cond.getOperand(0).getValueType())
9329     return SDValue();
9330
9331   // The inverted-condition + commuted-select variants of these patterns are
9332   // canonicalized to these forms in IR.
9333   SDValue X = Cond.getOperand(0);
9334   SDValue CondC = Cond.getOperand(1);
9335   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9336   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9337       isAllOnesOrAllOnesSplat(C2)) {
9338     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9339     SDLoc DL(N);
9340     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9341     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9342     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9343   }
9344   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9345     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9346     SDLoc DL(N);
9347     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9348     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9349     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9350   }
9351   return SDValue();
9352 }
9353
9354 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9355   SDValue Cond = N->getOperand(0);
9356   SDValue N1 = N->getOperand(1);
9357   SDValue N2 = N->getOperand(2);
9358   EVT VT = N->getValueType(0);
9359   EVT CondVT = Cond.getValueType();
9360   SDLoc DL(N);
9361
9362   if (!VT.isInteger())
9363     return SDValue();
9364
9365   auto *C1 = dyn_cast<ConstantSDNode>(N1);
9366   auto *C2 = dyn_cast<ConstantSDNode>(N2);
9367   if (!C1 || !C2)
9368     return SDValue();
9369
9370   // Only do this before legalization to avoid conflicting with target-specific
9371   // transforms in the other direction (create a select from a zext/sext). There
9372   // is also a target-independent combine here in DAGCombiner in the other
9373   // direction for (select Cond, -1, 0) when the condition is not i1.
9374   if (CondVT == MVT::i1 && !LegalOperations) {
9375     if (C1->isNullValue() && C2->isOne()) {
9376       // select Cond, 0, 1 --> zext (!Cond)
9377       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9378       if (VT != MVT::i1)
9379         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9380       return NotCond;
9381     }
9382     if (C1->isNullValue() && C2->isAllOnesValue()) {
9383       // select Cond, 0, -1 --> sext (!Cond)
9384       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9385       if (VT != MVT::i1)
9386         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9387       return NotCond;
9388     }
9389     if (C1->isOne() && C2->isNullValue()) {
9390       // select Cond, 1, 0 --> zext (Cond)
9391       if (VT != MVT::i1)
9392         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9393       return Cond;
9394     }
9395     if (C1->isAllOnesValue() && C2->isNullValue()) {
9396       // select Cond, -1, 0 --> sext (Cond)
9397       if (VT != MVT::i1)
9398         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9399       return Cond;
9400     }
9401
9402     // Use a target hook because some targets may prefer to transform in the
9403     // other direction.
9404     if (TLI.convertSelectOfConstantsToMath(VT)) {
9405       // For any constants that differ by 1, we can transform the select into an
9406       // extend and add.
9407       const APInt &C1Val = C1->getAPIntValue();
9408       const APInt &C2Val = C2->getAPIntValue();
9409       if (C1Val - 1 == C2Val) {
9410         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9411         if (VT != MVT::i1)
9412           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9413         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9414       }
9415       if (C1Val + 1 == C2Val) {
9416         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9417         if (VT != MVT::i1)
9418           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9419         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9420       }
9421
9422       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9423       if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
9424         if (VT != MVT::i1)
9425           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9426         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9427         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9428       }
9429
9430       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9431         return V;
9432     }
9433
9434     return SDValue();
9435   }
9436
9437   // fold (select Cond, 0, 1) -> (xor Cond, 1)
9438   // We can't do this reliably if integer based booleans have different contents
9439   // to floating point based booleans. This is because we can't tell whether we
9440   // have an integer-based boolean or a floating-point-based boolean unless we
9441   // can find the SETCC that produced it and inspect its operands. This is
9442   // fairly easy if C is the SETCC node, but it can potentially be
9443   // undiscoverable (or not reasonably discoverable). For example, it could be
9444   // in another basic block or it could require searching a complicated
9445   // expression.
9446   if (CondVT.isInteger() &&
9447       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9448           TargetLowering::ZeroOrOneBooleanContent &&
9449       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9450           TargetLowering::ZeroOrOneBooleanContent &&
9451       C1->isNullValue() && C2->isOne()) {
9452     SDValue NotCond =
9453         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9454     if (VT.bitsEq(CondVT))
9455       return NotCond;
9456     return DAG.getZExtOrTrunc(NotCond, DL, VT);
9457   }
9458
9459   return SDValue();
9460 }
9461
9462 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
9463   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
9464          "Expected a (v)select");
9465   SDValue Cond = N->getOperand(0);
9466   SDValue T = N->getOperand(1), F = N->getOperand(2);
9467   EVT VT = N->getValueType(0);
9468   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
9469     return SDValue();
9470
9471   // select Cond, Cond, F --> or Cond, F
9472   // select Cond, 1, F    --> or Cond, F
9473   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
9474     return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
9475
9476   // select Cond, T, Cond --> and Cond, T
9477   // select Cond, T, 0    --> and Cond, T
9478   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
9479     return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
9480
9481   // select Cond, T, 1 --> or (not Cond), T
9482   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
9483     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9484     return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
9485   }
9486
9487   // select Cond, 0, F --> and (not Cond), F
9488   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
9489     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9490     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
9491   }
9492
9493   return SDValue();
9494 }
9495
9496 SDValue DAGCombiner::visitSELECT(SDNode *N) {
9497   SDValue N0 = N->getOperand(0);
9498   SDValue N1 = N->getOperand(1);
9499   SDValue N2 = N->getOperand(2);
9500   EVT VT = N->getValueType(0);
9501   EVT VT0 = N0.getValueType();
9502   SDLoc DL(N);
9503   SDNodeFlags Flags = N->getFlags();
9504
9505   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9506     return V;
9507
9508   if (SDValue V = foldSelectOfConstants(N))
9509     return V;
9510
9511   if (SDValue V = foldBoolSelectToLogic(N, DAG))
9512     return V;
9513
9514   // If we can fold this based on the true/false value, do so.
9515   if (SimplifySelectOps(N, N1, N2))
9516     return SDValue(N, 0); // Don't revisit N.
9517
9518   if (VT0 == MVT::i1) {
9519     // The code in this block deals with the following 2 equivalences:
9520     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9521     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9522     // The target can specify its preferred form with the
9523     // shouldNormalizeToSelectSequence() callback. However we always transform
9524     // to the right anyway if we find the inner select exists in the DAG anyway
9525     // and we always transform to the left side if we know that we can further
9526     // optimize the combination of the conditions.
9527     bool normalizeToSequence =
9528         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9529     // select (and Cond0, Cond1), X, Y
9530     //   -> select Cond0, (select Cond1, X, Y), Y
9531     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9532       SDValue Cond0 = N0->getOperand(0);
9533       SDValue Cond1 = N0->getOperand(1);
9534       SDValue InnerSelect =
9535           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9536       if (normalizeToSequence || !InnerSelect.use_empty())
9537         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9538                            InnerSelect, N2, Flags);
9539       // Cleanup on failure.
9540       if (InnerSelect.use_empty())
9541         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9542     }
9543     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9544     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9545       SDValue Cond0 = N0->getOperand(0);
9546       SDValue Cond1 = N0->getOperand(1);
9547       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9548                                         Cond1, N1, N2, Flags);
9549       if (normalizeToSequence || !InnerSelect.use_empty())
9550         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9551                            InnerSelect, Flags);
9552       // Cleanup on failure.
9553       if (InnerSelect.use_empty())
9554         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9555     }
9556
9557     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9558     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9559       SDValue N1_0 = N1->getOperand(0);
9560       SDValue N1_1 = N1->getOperand(1);
9561       SDValue N1_2 = N1->getOperand(2);
9562       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9563         // Create the actual and node if we can generate good code for it.
9564         if (!normalizeToSequence) {
9565           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9566           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9567                              N2, Flags);
9568         }
9569         // Otherwise see if we can optimize the "and" to a better pattern.
9570         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9571           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9572                              N2, Flags);
9573         }
9574       }
9575     }
9576     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9577     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9578       SDValue N2_0 = N2->getOperand(0);
9579       SDValue N2_1 = N2->getOperand(1);
9580       SDValue N2_2 = N2->getOperand(2);
9581       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9582         // Create the actual or node if we can generate good code for it.
9583         if (!normalizeToSequence) {
9584           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9585           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9586                              N2_2, Flags);
9587         }
9588         // Otherwise see if we can optimize to a better pattern.
9589         if (SDValue Combined = visitORLike(N0, N2_0, N))
9590           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9591                              N2_2, Flags);
9592       }
9593     }
9594   }
9595
9596   // select (not Cond), N1, N2 -> select Cond, N2, N1
9597   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9598     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9599     SelectOp->setFlags(Flags);
9600     return SelectOp;
9601   }
9602
9603   // Fold selects based on a setcc into other things, such as min/max/abs.
9604   if (N0.getOpcode() == ISD::SETCC) {
9605     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9606     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9607
9608     // select (fcmp lt x, y), x, y -> fminnum x, y
9609     // select (fcmp gt x, y), x, y -> fmaxnum x, y
9610     //
9611     // This is OK if we don't care what happens if either operand is a NaN.
9612     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9613       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9614                                                 CC, TLI, DAG))
9615         return FMinMax;
9616
9617     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9618     // This is conservatively limited to pre-legal-operations to give targets
9619     // a chance to reverse the transform if they want to do that. Also, it is
9620     // unlikely that the pattern would be formed late, so it's probably not
9621     // worth going through the other checks.
9622     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9623         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9624         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9625       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9626       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9627       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9628         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9629         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9630         //
9631         // The IR equivalent of this transform would have this form:
9632         //   %a = add %x, C
9633         //   %c = icmp ugt %x, ~C
9634         //   %r = select %c, -1, %a
9635         //   =>
9636         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9637         //   %u0 = extractvalue %u, 0
9638         //   %u1 = extractvalue %u, 1
9639         //   %r = select %u1, -1, %u0
9640         SDVTList VTs = DAG.getVTList(VT, VT0);
9641         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9642         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9643       }
9644     }
9645
9646     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9647         (!LegalOperations &&
9648          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9649       // Any flags available in a select/setcc fold will be on the setcc as they
9650       // migrated from fcmp
9651       Flags = N0.getNode()->getFlags();
9652       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9653                                        N2, N0.getOperand(2));
9654       SelectNode->setFlags(Flags);
9655       return SelectNode;
9656     }
9657
9658     if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
9659       return NewSel;
9660   }
9661
9662   if (!VT.isVector())
9663     if (SDValue BinOp = foldSelectOfBinops(N))
9664       return BinOp;
9665
9666   return SDValue();
9667 }
9668
9669 // This function assumes all the vselect's arguments are CONCAT_VECTOR
9670 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9671 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9672   SDLoc DL(N);
9673   SDValue Cond = N->getOperand(0);
9674   SDValue LHS = N->getOperand(1);
9675   SDValue RHS = N->getOperand(2);
9676   EVT VT = N->getValueType(0);
9677   int NumElems = VT.getVectorNumElements();
9678   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
9679          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
9680          Cond.getOpcode() == ISD::BUILD_VECTOR);
9681
9682   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9683   // binary ones here.
9684   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9685     return SDValue();
9686
9687   // We're sure we have an even number of elements due to the
9688   // concat_vectors we have as arguments to vselect.
9689   // Skip BV elements until we find one that's not an UNDEF
9690   // After we find an UNDEF element, keep looping until we get to half the
9691   // length of the BV and see if all the non-undef nodes are the same.
9692   ConstantSDNode *BottomHalf = nullptr;
9693   for (int i = 0; i < NumElems / 2; ++i) {
9694     if (Cond->getOperand(i)->isUndef())
9695       continue;
9696
9697     if (BottomHalf == nullptr)
9698       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9699     else if (Cond->getOperand(i).getNode() != BottomHalf)
9700       return SDValue();
9701   }
9702
9703   // Do the same for the second half of the BuildVector
9704   ConstantSDNode *TopHalf = nullptr;
9705   for (int i = NumElems / 2; i < NumElems; ++i) {
9706     if (Cond->getOperand(i)->isUndef())
9707       continue;
9708
9709     if (TopHalf == nullptr)
9710       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9711     else if (Cond->getOperand(i).getNode() != TopHalf)
9712       return SDValue();
9713   }
9714
9715   assert(TopHalf && BottomHalf &&
9716          "One half of the selector was all UNDEFs and the other was all the "
9717          "same value. This should have been addressed before this function.");
9718   return DAG.getNode(
9719       ISD::CONCAT_VECTORS, DL, VT,
9720       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9721       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9722 }
9723
9724 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
9725   if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
9726     return false;
9727
9728   // For now we check only the LHS of the add.
9729   SDValue LHS = Index.getOperand(0);
9730   SDValue SplatVal = DAG.getSplatValue(LHS);
9731   if (!SplatVal)
9732     return false;
9733
9734   BasePtr = SplatVal;
9735   Index = Index.getOperand(1);
9736   return true;
9737 }
9738
9739 // Fold sext/zext of index into index type.
9740 bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
9741                      bool Scaled, SelectionDAG &DAG) {
9742   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9743
9744   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
9745     SDValue Op = Index.getOperand(0);
9746     MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
9747     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9748       Index = Op;
9749       return true;
9750     }
9751   }
9752
9753   if (Index.getOpcode() == ISD::SIGN_EXTEND) {
9754     SDValue Op = Index.getOperand(0);
9755     MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
9756     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9757       Index = Op;
9758       return true;
9759     }
9760   }
9761
9762   return false;
9763 }
9764
9765 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9766   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9767   SDValue Mask = MSC->getMask();
9768   SDValue Chain = MSC->getChain();
9769   SDValue Index = MSC->getIndex();
9770   SDValue Scale = MSC->getScale();
9771   SDValue StoreVal = MSC->getValue();
9772   SDValue BasePtr = MSC->getBasePtr();
9773   SDLoc DL(N);
9774
9775   // Zap scatters with a zero mask.
9776   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9777     return Chain;
9778
9779   if (refineUniformBase(BasePtr, Index, DAG)) {
9780     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9781     return DAG.getMaskedScatter(
9782         DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
9783         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9784   }
9785
9786   if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
9787     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9788     return DAG.getMaskedScatter(
9789         DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
9790         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9791   }
9792
9793   return SDValue();
9794 }
9795
9796 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9797   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9798   SDValue Mask = MST->getMask();
9799   SDValue Chain = MST->getChain();
9800   SDLoc DL(N);
9801
9802   // Zap masked stores with a zero mask.
9803   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9804     return Chain;
9805
9806   // If this is a masked load with an all ones mask, we can use a unmasked load.
9807   // FIXME: Can we do this for indexed, compressing, or truncating stores?
9808   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9809       MST->isUnindexed() && !MST->isCompressingStore() &&
9810       !MST->isTruncatingStore())
9811     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
9812                         MST->getBasePtr(), MST->getMemOperand());
9813
9814   // Try transforming N to an indexed store.
9815   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9816     return SDValue(N, 0);
9817
9818   return SDValue();
9819 }
9820
9821 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9822   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9823   SDValue Mask = MGT->getMask();
9824   SDValue Chain = MGT->getChain();
9825   SDValue Index = MGT->getIndex();
9826   SDValue Scale = MGT->getScale();
9827   SDValue PassThru = MGT->getPassThru();
9828   SDValue BasePtr = MGT->getBasePtr();
9829   SDLoc DL(N);
9830
9831   // Zap gathers with a zero mask.
9832   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9833     return CombineTo(N, PassThru, MGT->getChain());
9834
9835   if (refineUniformBase(BasePtr, Index, DAG)) {
9836     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9837     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9838                                MGT->getMemoryVT(), DL, Ops,
9839                                MGT->getMemOperand(), MGT->getIndexType(),
9840                                MGT->getExtensionType());
9841   }
9842
9843   if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
9844     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9845     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9846                                MGT->getMemoryVT(), DL, Ops,
9847                                MGT->getMemOperand(), MGT->getIndexType(),
9848                                MGT->getExtensionType());
9849   }
9850
9851   return SDValue();
9852 }
9853
9854 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9855   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9856   SDValue Mask = MLD->getMask();
9857   SDLoc DL(N);
9858
9859   // Zap masked loads with a zero mask.
9860   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9861     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9862
9863   // If this is a masked load with an all ones mask, we can use a unmasked load.
9864   // FIXME: Can we do this for indexed, expanding, or extending loads?
9865   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9866       MLD->isUnindexed() && !MLD->isExpandingLoad() &&
9867       MLD->getExtensionType() == ISD::NON_EXTLOAD) {
9868     SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
9869                                 MLD->getBasePtr(), MLD->getMemOperand());
9870     return CombineTo(N, NewLd, NewLd.getValue(1));
9871   }
9872
9873   // Try transforming N to an indexed load.
9874   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9875     return SDValue(N, 0);
9876
9877   return SDValue();
9878 }
9879
9880 /// A vector select of 2 constant vectors can be simplified to math/logic to
9881 /// avoid a variable select instruction and possibly avoid constant loads.
9882 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9883   SDValue Cond = N->getOperand(0);
9884   SDValue N1 = N->getOperand(1);
9885   SDValue N2 = N->getOperand(2);
9886   EVT VT = N->getValueType(0);
9887   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9888       !TLI.convertSelectOfConstantsToMath(VT) ||
9889       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9890       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9891     return SDValue();
9892
9893   // Check if we can use the condition value to increment/decrement a single
9894   // constant value. This simplifies a select to an add and removes a constant
9895   // load/materialization from the general case.
9896   bool AllAddOne = true;
9897   bool AllSubOne = true;
9898   unsigned Elts = VT.getVectorNumElements();
9899   for (unsigned i = 0; i != Elts; ++i) {
9900     SDValue N1Elt = N1.getOperand(i);
9901     SDValue N2Elt = N2.getOperand(i);
9902     if (N1Elt.isUndef() || N2Elt.isUndef())
9903       continue;
9904     if (N1Elt.getValueType() != N2Elt.getValueType())
9905       continue;
9906
9907     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9908     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9909     if (C1 != C2 + 1)
9910       AllAddOne = false;
9911     if (C1 != C2 - 1)
9912       AllSubOne = false;
9913   }
9914
9915   // Further simplifications for the extra-special cases where the constants are
9916   // all 0 or all -1 should be implemented as folds of these patterns.
9917   SDLoc DL(N);
9918   if (AllAddOne || AllSubOne) {
9919     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9920     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9921     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9922     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9923     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9924   }
9925
9926   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9927   APInt Pow2C;
9928   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9929       isNullOrNullSplat(N2)) {
9930     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9931     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9932     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9933   }
9934
9935   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9936     return V;
9937
9938   // The general case for select-of-constants:
9939   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9940   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9941   // leave that to a machine-specific pass.
9942   return SDValue();
9943 }
9944
9945 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9946   SDValue N0 = N->getOperand(0);
9947   SDValue N1 = N->getOperand(1);
9948   SDValue N2 = N->getOperand(2);
9949   EVT VT = N->getValueType(0);
9950   SDLoc DL(N);
9951
9952   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9953     return V;
9954
9955   if (SDValue V = foldBoolSelectToLogic(N, DAG))
9956     return V;
9957
9958   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9959   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9960     return DAG.getSelect(DL, VT, F, N2, N1);
9961
9962   // Canonicalize integer abs.
9963   // vselect (setg[te] X,  0),  X, -X ->
9964   // vselect (setgt    X, -1),  X, -X ->
9965   // vselect (setl[te] X,  0), -X,  X ->
9966   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9967   if (N0.getOpcode() == ISD::SETCC) {
9968     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9969     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9970     bool isAbs = false;
9971     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9972
9973     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9974          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9975         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9976       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9977     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9978              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9979       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9980
9981     if (isAbs) {
9982       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9983         return DAG.getNode(ISD::ABS, DL, VT, LHS);
9984
9985       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9986                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
9987                                                   DL, getShiftAmountTy(VT)));
9988       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9989       AddToWorklist(Shift.getNode());
9990       AddToWorklist(Add.getNode());
9991       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9992     }
9993
9994     // vselect x, y (fcmp lt x, y) -> fminnum x, y
9995     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9996     //
9997     // This is OK if we don't care about what happens if either operand is a
9998     // NaN.
9999     //
10000     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10001       if (SDValue FMinMax =
10002               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10003         return FMinMax;
10004     }
10005
10006     // If this select has a condition (setcc) with narrower operands than the
10007     // select, try to widen the compare to match the select width.
10008     // TODO: This should be extended to handle any constant.
10009     // TODO: This could be extended to handle non-loading patterns, but that
10010     //       requires thorough testing to avoid regressions.
10011     if (isNullOrNullSplat(RHS)) {
10012       EVT NarrowVT = LHS.getValueType();
10013       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
10014       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10015       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10016       unsigned WideWidth = WideVT.getScalarSizeInBits();
10017       bool IsSigned = isSignedIntSetCC(CC);
10018       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10019       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10020           SetCCWidth != 1 && SetCCWidth < WideWidth &&
10021           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10022           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10023         // Both compare operands can be widened for free. The LHS can use an
10024         // extended load, and the RHS is a constant:
10025         //   vselect (ext (setcc load(X), C)), N1, N2 -->
10026         //   vselect (setcc extload(X), C'), N1, N2
10027         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10028         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10029         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10030         EVT WideSetCCVT = getSetCCResultType(WideVT);
10031         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10032         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10033       }
10034     }
10035
10036     // Match VSELECTs into add with unsigned saturation.
10037     if (hasOperation(ISD::UADDSAT, VT)) {
10038       // Check if one of the arms of the VSELECT is vector with all bits set.
10039       // If it's on the left side invert the predicate to simplify logic below.
10040       SDValue Other;
10041       ISD::CondCode SatCC = CC;
10042       if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10043         Other = N2;
10044         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10045       } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10046         Other = N1;
10047       }
10048
10049       if (Other && Other.getOpcode() == ISD::ADD) {
10050         SDValue CondLHS = LHS, CondRHS = RHS;
10051         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10052
10053         // Canonicalize condition operands.
10054         if (SatCC == ISD::SETUGE) {
10055           std::swap(CondLHS, CondRHS);
10056           SatCC = ISD::SETULE;
10057         }
10058
10059         // We can test against either of the addition operands.
10060         // x <= x+y ? x+y : ~0 --> uaddsat x, y
10061         // x+y >= x ? x+y : ~0 --> uaddsat x, y
10062         if (SatCC == ISD::SETULE && Other == CondRHS &&
10063             (OpLHS == CondLHS || OpRHS == CondLHS))
10064           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10065
10066         if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10067             (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10068              OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10069             CondLHS == OpLHS) {
10070           // If the RHS is a constant we have to reverse the const
10071           // canonicalization.
10072           // x >= ~C ? x+C : ~0 --> uaddsat x, C
10073           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10074             return Cond->getAPIntValue() == ~Op->getAPIntValue();
10075           };
10076           if (SatCC == ISD::SETULE &&
10077               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10078             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10079         }
10080       }
10081     }
10082
10083     // Match VSELECTs into sub with unsigned saturation.
10084     if (hasOperation(ISD::USUBSAT, VT)) {
10085       // Check if one of the arms of the VSELECT is a zero vector. If it's on
10086       // the left side invert the predicate to simplify logic below.
10087       SDValue Other;
10088       ISD::CondCode SatCC = CC;
10089       if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10090         Other = N2;
10091         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10092       } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10093         Other = N1;
10094       }
10095
10096       if (Other && Other.getNumOperands() == 2) {
10097         SDValue CondRHS = RHS;
10098         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10099
10100         if (Other.getOpcode() == ISD::SUB &&
10101             LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10102             OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10103           // Look for a general sub with unsigned saturation first.
10104           // zext(x) >= y ? x - trunc(y) : 0
10105           // --> usubsat(x,trunc(umin(y,SatLimit)))
10106           // zext(x) >  y ? x - trunc(y) : 0
10107           // --> usubsat(x,trunc(umin(y,SatLimit)))
10108           if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10109             return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10110                                        DL);
10111         }
10112
10113         if (OpLHS == LHS) {
10114           // Look for a general sub with unsigned saturation first.
10115           // x >= y ? x-y : 0 --> usubsat x, y
10116           // x >  y ? x-y : 0 --> usubsat x, y
10117           if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10118               Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10119             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10120
10121           if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10122               OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10123             if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10124                 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10125               // If the RHS is a constant we have to reverse the const
10126               // canonicalization.
10127               // x > C-1 ? x+-C : 0 --> usubsat x, C
10128               auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10129                 return (!Op && !Cond) ||
10130                        (Op && Cond &&
10131                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10132               };
10133               if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10134                   ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10135                                             /*AllowUndefs*/ true)) {
10136                 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10137                                     DAG.getConstant(0, DL, VT), OpRHS);
10138                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10139               }
10140
10141               // Another special case: If C was a sign bit, the sub has been
10142               // canonicalized into a xor.
10143               // FIXME: Would it be better to use computeKnownBits to determine
10144               //        whether it's safe to decanonicalize the xor?
10145               // x s< 0 ? x^C : 0 --> usubsat x, C
10146               APInt SplatValue;
10147               if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10148                   ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
10149                   ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
10150                   SplatValue.isSignMask()) {
10151                 // Note that we have to rebuild the RHS constant here to
10152                 // ensure we don't rely on particular values of undef lanes.
10153                 OpRHS = DAG.getConstant(SplatValue, DL, VT);
10154                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10155               }
10156             }
10157           }
10158         }
10159       }
10160     }
10161   }
10162
10163   if (SimplifySelectOps(N, N1, N2))
10164     return SDValue(N, 0);  // Don't revisit N.
10165
10166   // Fold (vselect all_ones, N1, N2) -> N1
10167   if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
10168     return N1;
10169   // Fold (vselect all_zeros, N1, N2) -> N2
10170   if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
10171     return N2;
10172
10173   // The ConvertSelectToConcatVector function is assuming both the above
10174   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10175   // and addressed.
10176   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10177       N2.getOpcode() == ISD::CONCAT_VECTORS &&
10178       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
10179     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
10180       return CV;
10181   }
10182
10183   if (SDValue V = foldVSelectOfConstants(N))
10184     return V;
10185
10186   return SDValue();
10187 }
10188
10189 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10190   SDValue N0 = N->getOperand(0);
10191   SDValue N1 = N->getOperand(1);
10192   SDValue N2 = N->getOperand(2);
10193   SDValue N3 = N->getOperand(3);
10194   SDValue N4 = N->getOperand(4);
10195   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10196
10197   // fold select_cc lhs, rhs, x, x, cc -> x
10198   if (N2 == N3)
10199     return N2;
10200
10201   // Determine if the condition we're dealing with is constant
10202   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10203                                   CC, SDLoc(N), false)) {
10204     AddToWorklist(SCC.getNode());
10205
10206     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10207       if (!SCCC->isNullValue())
10208         return N2;    // cond always true -> true val
10209       else
10210         return N3;    // cond always false -> false val
10211     } else if (SCC->isUndef()) {
10212       // When the condition is UNDEF, just return the first operand. This is
10213       // coherent the DAG creation, no setcc node is created in this case
10214       return N2;
10215     } else if (SCC.getOpcode() == ISD::SETCC) {
10216       // Fold to a simpler select_cc
10217       SDValue SelectOp = DAG.getNode(
10218           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10219           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10220       SelectOp->setFlags(SCC->getFlags());
10221       return SelectOp;
10222     }
10223   }
10224
10225   // If we can fold this based on the true/false value, do so.
10226   if (SimplifySelectOps(N, N2, N3))
10227     return SDValue(N, 0);  // Don't revisit N.
10228
10229   // fold select_cc into other things, such as min/max/abs
10230   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10231 }
10232
10233 SDValue DAGCombiner::visitSETCC(SDNode *N) {
10234   // setcc is very commonly used as an argument to brcond. This pattern
10235   // also lend itself to numerous combines and, as a result, it is desired
10236   // we keep the argument to a brcond as a setcc as much as possible.
10237   bool PreferSetCC =
10238       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10239
10240   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10241   EVT VT = N->getValueType(0);
10242
10243   //   SETCC(FREEZE(X), CONST, Cond)
10244   // =>
10245   //   FREEZE(SETCC(X, CONST, Cond))
10246   // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
10247   // isn't equivalent to true or false.
10248   // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
10249   // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
10250   //
10251   // This transformation is beneficial because visitBRCOND can fold
10252   // BRCOND(FREEZE(X)) to BRCOND(X).
10253
10254   // Conservatively optimize integer comparisons only.
10255   if (PreferSetCC) {
10256     // Do this only when SETCC is going to be used by BRCOND.
10257
10258     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
10259     ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
10260     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
10261     bool Updated = false;
10262
10263     // Is 'X Cond C' always true or false?
10264     auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
10265       bool False = (Cond == ISD::SETULT && C->isNullValue()) ||
10266                    (Cond == ISD::SETLT  && C->isMinSignedValue()) ||
10267                    (Cond == ISD::SETUGT && C->isAllOnesValue()) ||
10268                    (Cond == ISD::SETGT  && C->isMaxSignedValue());
10269       bool True =  (Cond == ISD::SETULE && C->isAllOnesValue()) ||
10270                    (Cond == ISD::SETLE  && C->isMaxSignedValue()) ||
10271                    (Cond == ISD::SETUGE && C->isNullValue()) ||
10272                    (Cond == ISD::SETGE  && C->isMinSignedValue());
10273       return True || False;
10274     };
10275
10276     if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
10277       if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
10278         N0 = N0->getOperand(0);
10279         Updated = true;
10280       }
10281     }
10282     if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
10283       if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
10284                                N0C)) {
10285         N1 = N1->getOperand(0);
10286         Updated = true;
10287       }
10288     }
10289
10290     if (Updated)
10291       return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
10292   }
10293
10294   SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
10295                                    SDLoc(N), !PreferSetCC);
10296
10297   if (!Combined)
10298     return SDValue();
10299
10300   // If we prefer to have a setcc, and we don't, we'll try our best to
10301   // recreate one using rebuildSetCC.
10302   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10303     SDValue NewSetCC = rebuildSetCC(Combined);
10304
10305     // We don't have anything interesting to combine to.
10306     if (NewSetCC.getNode() == N)
10307       return SDValue();
10308
10309     if (NewSetCC)
10310       return NewSetCC;
10311   }
10312
10313   return Combined;
10314 }
10315
10316 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10317   SDValue LHS = N->getOperand(0);
10318   SDValue RHS = N->getOperand(1);
10319   SDValue Carry = N->getOperand(2);
10320   SDValue Cond = N->getOperand(3);
10321
10322   // If Carry is false, fold to a regular SETCC.
10323   if (isNullConstant(Carry))
10324     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10325
10326   return SDValue();
10327 }
10328
10329 /// Check if N satisfies:
10330 ///   N is used once.
10331 ///   N is a Load.
10332 ///   The load is compatible with ExtOpcode. It means
10333 ///     If load has explicit zero/sign extension, ExpOpcode must have the same
10334 ///     extension.
10335 ///     Otherwise returns true.
10336 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10337   if (!N.hasOneUse())
10338     return false;
10339
10340   if (!isa<LoadSDNode>(N))
10341     return false;
10342
10343   LoadSDNode *Load = cast<LoadSDNode>(N);
10344   ISD::LoadExtType LoadExt = Load->getExtensionType();
10345   if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10346     return true;
10347
10348   // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10349   // extension.
10350   if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10351       (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10352     return false;
10353
10354   return true;
10355 }
10356
10357 /// Fold
10358 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10359 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10360 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10361 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10362 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10363 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10364                                          SelectionDAG &DAG) {
10365   unsigned Opcode = N->getOpcode();
10366   SDValue N0 = N->getOperand(0);
10367   EVT VT = N->getValueType(0);
10368   SDLoc DL(N);
10369
10370   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10371           Opcode == ISD::ANY_EXTEND) &&
10372          "Expected EXTEND dag node in input!");
10373
10374   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10375       !N0.hasOneUse())
10376     return SDValue();
10377
10378   SDValue Op1 = N0->getOperand(1);
10379   SDValue Op2 = N0->getOperand(2);
10380   if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10381     return SDValue();
10382
10383   auto ExtLoadOpcode = ISD::EXTLOAD;
10384   if (Opcode == ISD::SIGN_EXTEND)
10385     ExtLoadOpcode = ISD::SEXTLOAD;
10386   else if (Opcode == ISD::ZERO_EXTEND)
10387     ExtLoadOpcode = ISD::ZEXTLOAD;
10388
10389   LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10390   LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10391   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10392       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10393     return SDValue();
10394
10395   SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10396   SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10397   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10398 }
10399
10400 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10401 /// a build_vector of constants.
10402 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10403 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10404 /// Vector extends are not folded if operations are legal; this is to
10405 /// avoid introducing illegal build_vector dag nodes.
10406 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
10407                                          SelectionDAG &DAG, bool LegalTypes) {
10408   unsigned Opcode = N->getOpcode();
10409   SDValue N0 = N->getOperand(0);
10410   EVT VT = N->getValueType(0);
10411   SDLoc DL(N);
10412
10413   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10414          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
10415          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
10416          && "Expected EXTEND dag node in input!");
10417
10418   // fold (sext c1) -> c1
10419   // fold (zext c1) -> c1
10420   // fold (aext c1) -> c1
10421   if (isa<ConstantSDNode>(N0))
10422     return DAG.getNode(Opcode, DL, VT, N0);
10423
10424   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10425   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10426   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10427   if (N0->getOpcode() == ISD::SELECT) {
10428     SDValue Op1 = N0->getOperand(1);
10429     SDValue Op2 = N0->getOperand(2);
10430     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10431         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10432       // For any_extend, choose sign extension of the constants to allow a
10433       // possible further transform to sign_extend_inreg.i.e.
10434       //
10435       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10436       // t2: i64 = any_extend t1
10437       // -->
10438       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10439       // -->
10440       // t4: i64 = sign_extend_inreg t3
10441       unsigned FoldOpc = Opcode;
10442       if (FoldOpc == ISD::ANY_EXTEND)
10443         FoldOpc = ISD::SIGN_EXTEND;
10444       return DAG.getSelect(DL, VT, N0->getOperand(0),
10445                            DAG.getNode(FoldOpc, DL, VT, Op1),
10446                            DAG.getNode(FoldOpc, DL, VT, Op2));
10447     }
10448   }
10449
10450   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10451   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10452   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10453   EVT SVT = VT.getScalarType();
10454   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10455       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10456     return SDValue();
10457
10458   // We can fold this node into a build_vector.
10459   unsigned VTBits = SVT.getSizeInBits();
10460   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10461   SmallVector<SDValue, 8> Elts;
10462   unsigned NumElts = VT.getVectorNumElements();
10463
10464   // For zero-extensions, UNDEF elements still guarantee to have the upper
10465   // bits set to zero.
10466   bool IsZext =
10467       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10468
10469   for (unsigned i = 0; i != NumElts; ++i) {
10470     SDValue Op = N0.getOperand(i);
10471     if (Op.isUndef()) {
10472       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10473       continue;
10474     }
10475
10476     SDLoc DL(Op);
10477     // Get the constant value and if needed trunc it to the size of the type.
10478     // Nodes like build_vector might have constants wider than the scalar type.
10479     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10480     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10481       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10482     else
10483       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10484   }
10485
10486   return DAG.getBuildVector(VT, DL, Elts);
10487 }
10488
10489 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10490 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10491 // transformation. Returns true if extension are possible and the above
10492 // mentioned transformation is profitable.
10493 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10494                                     unsigned ExtOpc,
10495                                     SmallVectorImpl<SDNode *> &ExtendNodes,
10496                                     const TargetLowering &TLI) {
10497   bool HasCopyToRegUses = false;
10498   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10499   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10500                             UE = N0.getNode()->use_end();
10501        UI != UE; ++UI) {
10502     SDNode *User = *UI;
10503     if (User == N)
10504       continue;
10505     if (UI.getUse().getResNo() != N0.getResNo())
10506       continue;
10507     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10508     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10509       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10510       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10511         // Sign bits will be lost after a zext.
10512         return false;
10513       bool Add = false;
10514       for (unsigned i = 0; i != 2; ++i) {
10515         SDValue UseOp = User->getOperand(i);
10516         if (UseOp == N0)
10517           continue;
10518         if (!isa<ConstantSDNode>(UseOp))
10519           return false;
10520         Add = true;
10521       }
10522       if (Add)
10523         ExtendNodes.push_back(User);
10524       continue;
10525     }
10526     // If truncates aren't free and there are users we can't
10527     // extend, it isn't worthwhile.
10528     if (!isTruncFree)
10529       return false;
10530     // Remember if this value is live-out.
10531     if (User->getOpcode() == ISD::CopyToReg)
10532       HasCopyToRegUses = true;
10533   }
10534
10535   if (HasCopyToRegUses) {
10536     bool BothLiveOut = false;
10537     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10538          UI != UE; ++UI) {
10539       SDUse &Use = UI.getUse();
10540       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10541         BothLiveOut = true;
10542         break;
10543       }
10544     }
10545     if (BothLiveOut)
10546       // Both unextended and extended values are live out. There had better be
10547       // a good reason for the transformation.
10548       return ExtendNodes.size();
10549   }
10550   return true;
10551 }
10552
10553 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10554                                   SDValue OrigLoad, SDValue ExtLoad,
10555                                   ISD::NodeType ExtType) {
10556   // Extend SetCC uses if necessary.
10557   SDLoc DL(ExtLoad);
10558   for (SDNode *SetCC : SetCCs) {
10559     SmallVector<SDValue, 4> Ops;
10560
10561     for (unsigned j = 0; j != 2; ++j) {
10562       SDValue SOp = SetCC->getOperand(j);
10563       if (SOp == OrigLoad)
10564         Ops.push_back(ExtLoad);
10565       else
10566         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10567     }
10568
10569     Ops.push_back(SetCC->getOperand(2));
10570     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10571   }
10572 }
10573
10574 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10575 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
10576   SDValue N0 = N->getOperand(0);
10577   EVT DstVT = N->getValueType(0);
10578   EVT SrcVT = N0.getValueType();
10579
10580   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10581           N->getOpcode() == ISD::ZERO_EXTEND) &&
10582          "Unexpected node type (not an extend)!");
10583
10584   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
10585   // For example, on a target with legal v4i32, but illegal v8i32, turn:
10586   //   (v8i32 (sext (v8i16 (load x))))
10587   // into:
10588   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
10589   //                          (v4i32 (sextload (x + 16)))))
10590   // Where uses of the original load, i.e.:
10591   //   (v8i16 (load x))
10592   // are replaced with:
10593   //   (v8i16 (truncate
10594   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
10595   //                            (v4i32 (sextload (x + 16)))))))
10596   //
10597   // This combine is only applicable to illegal, but splittable, vectors.
10598   // All legal types, and illegal non-vector types, are handled elsewhere.
10599   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
10600   //
10601   if (N0->getOpcode() != ISD::LOAD)
10602     return SDValue();
10603
10604   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10605
10606   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
10607       !N0.hasOneUse() || !LN0->isSimple() ||
10608       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
10609       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10610     return SDValue();
10611
10612   SmallVector<SDNode *, 4> SetCCs;
10613   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
10614     return SDValue();
10615
10616   ISD::LoadExtType ExtType =
10617       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10618
10619   // Try to split the vector types to get down to legal types.
10620   EVT SplitSrcVT = SrcVT;
10621   EVT SplitDstVT = DstVT;
10622   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
10623          SplitSrcVT.getVectorNumElements() > 1) {
10624     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
10625     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
10626   }
10627
10628   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
10629     return SDValue();
10630
10631   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
10632
10633   SDLoc DL(N);
10634   const unsigned NumSplits =
10635       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
10636   const unsigned Stride = SplitSrcVT.getStoreSize();
10637   SmallVector<SDValue, 4> Loads;
10638   SmallVector<SDValue, 4> Chains;
10639
10640   SDValue BasePtr = LN0->getBasePtr();
10641   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
10642     const unsigned Offset = Idx * Stride;
10643     const Align Align = commonAlignment(LN0->getAlign(), Offset);
10644
10645     SDValue SplitLoad = DAG.getExtLoad(
10646         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
10647         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
10648         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10649
10650     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
10651
10652     Loads.push_back(SplitLoad.getValue(0));
10653     Chains.push_back(SplitLoad.getValue(1));
10654   }
10655
10656   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10657   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
10658
10659   // Simplify TF.
10660   AddToWorklist(NewChain.getNode());
10661
10662   CombineTo(N, NewValue);
10663
10664   // Replace uses of the original load (before extension)
10665   // with a truncate of the concatenated sextloaded vectors.
10666   SDValue Trunc =
10667       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
10668   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
10669   CombineTo(N0.getNode(), Trunc, NewChain);
10670   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10671 }
10672
10673 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10674 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10675 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
10676   assert(N->getOpcode() == ISD::ZERO_EXTEND);
10677   EVT VT = N->getValueType(0);
10678   EVT OrigVT = N->getOperand(0).getValueType();
10679   if (TLI.isZExtFree(OrigVT, VT))
10680     return SDValue();
10681
10682   // and/or/xor
10683   SDValue N0 = N->getOperand(0);
10684   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10685         N0.getOpcode() == ISD::XOR) ||
10686       N0.getOperand(1).getOpcode() != ISD::Constant ||
10687       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
10688     return SDValue();
10689
10690   // shl/shr
10691   SDValue N1 = N0->getOperand(0);
10692   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
10693       N1.getOperand(1).getOpcode() != ISD::Constant ||
10694       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
10695     return SDValue();
10696
10697   // load
10698   if (!isa<LoadSDNode>(N1.getOperand(0)))
10699     return SDValue();
10700   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
10701   EVT MemVT = Load->getMemoryVT();
10702   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
10703       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
10704     return SDValue();
10705
10706
10707   // If the shift op is SHL, the logic op must be AND, otherwise the result
10708   // will be wrong.
10709   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
10710     return SDValue();
10711
10712   if (!N0.hasOneUse() || !N1.hasOneUse())
10713     return SDValue();
10714
10715   SmallVector<SDNode*, 4> SetCCs;
10716   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
10717                                ISD::ZERO_EXTEND, SetCCs, TLI))
10718     return SDValue();
10719
10720   // Actually do the transformation.
10721   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
10722                                    Load->getChain(), Load->getBasePtr(),
10723                                    Load->getMemoryVT(), Load->getMemOperand());
10724
10725   SDLoc DL1(N1);
10726   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
10727                               N1.getOperand(1));
10728
10729   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10730   SDLoc DL0(N0);
10731   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
10732                             DAG.getConstant(Mask, DL0, VT));
10733
10734   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10735   CombineTo(N, And);
10736   if (SDValue(Load, 0).hasOneUse()) {
10737     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
10738   } else {
10739     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
10740                                 Load->getValueType(0), ExtLoad);
10741     CombineTo(Load, Trunc, ExtLoad.getValue(1));
10742   }
10743
10744   // N0 is dead at this point.
10745   recursivelyDeleteUnusedNodes(N0.getNode());
10746
10747   return SDValue(N,0); // Return N so it doesn't get rechecked!
10748 }
10749
10750 /// If we're narrowing or widening the result of a vector select and the final
10751 /// size is the same size as a setcc (compare) feeding the select, then try to
10752 /// apply the cast operation to the select's operands because matching vector
10753 /// sizes for a select condition and other operands should be more efficient.
10754 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
10755   unsigned CastOpcode = Cast->getOpcode();
10756   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
10757           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
10758           CastOpcode == ISD::FP_ROUND) &&
10759          "Unexpected opcode for vector select narrowing/widening");
10760
10761   // We only do this transform before legal ops because the pattern may be
10762   // obfuscated by target-specific operations after legalization. Do not create
10763   // an illegal select op, however, because that may be difficult to lower.
10764   EVT VT = Cast->getValueType(0);
10765   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
10766     return SDValue();
10767
10768   SDValue VSel = Cast->getOperand(0);
10769   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
10770       VSel.getOperand(0).getOpcode() != ISD::SETCC)
10771     return SDValue();
10772
10773   // Does the setcc have the same vector size as the casted select?
10774   SDValue SetCC = VSel.getOperand(0);
10775   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
10776   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
10777     return SDValue();
10778
10779   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
10780   SDValue A = VSel.getOperand(1);
10781   SDValue B = VSel.getOperand(2);
10782   SDValue CastA, CastB;
10783   SDLoc DL(Cast);
10784   if (CastOpcode == ISD::FP_ROUND) {
10785     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
10786     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
10787     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
10788   } else {
10789     CastA = DAG.getNode(CastOpcode, DL, VT, A);
10790     CastB = DAG.getNode(CastOpcode, DL, VT, B);
10791   }
10792   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
10793 }
10794
10795 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10796 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10797 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
10798                                      const TargetLowering &TLI, EVT VT,
10799                                      bool LegalOperations, SDNode *N,
10800                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
10801   SDNode *N0Node = N0.getNode();
10802   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
10803                                                    : ISD::isZEXTLoad(N0Node);
10804   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
10805       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
10806     return SDValue();
10807
10808   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10809   EVT MemVT = LN0->getMemoryVT();
10810   if ((LegalOperations || !LN0->isSimple() ||
10811        VT.isVector()) &&
10812       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
10813     return SDValue();
10814
10815   SDValue ExtLoad =
10816       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10817                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
10818   Combiner.CombineTo(N, ExtLoad);
10819   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10820   if (LN0->use_empty())
10821     Combiner.recursivelyDeleteUnusedNodes(LN0);
10822   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10823 }
10824
10825 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10826 // Only generate vector extloads when 1) they're legal, and 2) they are
10827 // deemed desirable by the target.
10828 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
10829                                   const TargetLowering &TLI, EVT VT,
10830                                   bool LegalOperations, SDNode *N, SDValue N0,
10831                                   ISD::LoadExtType ExtLoadType,
10832                                   ISD::NodeType ExtOpc) {
10833   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
10834       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
10835       ((LegalOperations || VT.isVector() ||
10836         !cast<LoadSDNode>(N0)->isSimple()) &&
10837        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
10838     return {};
10839
10840   bool DoXform = true;
10841   SmallVector<SDNode *, 4> SetCCs;
10842   if (!N0.hasOneUse())
10843     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
10844   if (VT.isVector())
10845     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
10846   if (!DoXform)
10847     return {};
10848
10849   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10850   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10851                                    LN0->getBasePtr(), N0.getValueType(),
10852                                    LN0->getMemOperand());
10853   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
10854   // If the load value is used only by N, replace it via CombineTo N.
10855   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
10856   Combiner.CombineTo(N, ExtLoad);
10857   if (NoReplaceTrunc) {
10858     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10859     Combiner.recursivelyDeleteUnusedNodes(LN0);
10860   } else {
10861     SDValue Trunc =
10862         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
10863     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10864   }
10865   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10866 }
10867
10868 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
10869                                         const TargetLowering &TLI, EVT VT,
10870                                         SDNode *N, SDValue N0,
10871                                         ISD::LoadExtType ExtLoadType,
10872                                         ISD::NodeType ExtOpc) {
10873   if (!N0.hasOneUse())
10874     return SDValue();
10875
10876   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
10877   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
10878     return SDValue();
10879
10880   if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
10881     return SDValue();
10882
10883   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10884     return SDValue();
10885
10886   SDLoc dl(Ld);
10887   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
10888   SDValue NewLoad = DAG.getMaskedLoad(
10889       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
10890       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
10891       ExtLoadType, Ld->isExpandingLoad());
10892   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
10893   return NewLoad;
10894 }
10895
10896 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
10897                                        bool LegalOperations) {
10898   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10899           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
10900
10901   SDValue SetCC = N->getOperand(0);
10902   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
10903       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
10904     return SDValue();
10905
10906   SDValue X = SetCC.getOperand(0);
10907   SDValue Ones = SetCC.getOperand(1);
10908   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
10909   EVT VT = N->getValueType(0);
10910   EVT XVT = X.getValueType();
10911   // setge X, C is canonicalized to setgt, so we do not need to match that
10912   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
10913   // not require the 'not' op.
10914   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
10915     // Invert and smear/shift the sign bit:
10916     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
10917     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10918     SDLoc DL(N);
10919     unsigned ShCt = VT.getSizeInBits() - 1;
10920     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10921     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10922       SDValue NotX = DAG.getNOT(DL, X, VT);
10923       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10924       auto ShiftOpcode =
10925         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10926       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10927     }
10928   }
10929   return SDValue();
10930 }
10931
10932 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
10933   SDValue N0 = N->getOperand(0);
10934   if (N0.getOpcode() != ISD::SETCC)
10935     return SDValue();
10936
10937   SDValue N00 = N0.getOperand(0);
10938   SDValue N01 = N0.getOperand(1);
10939   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10940   EVT VT = N->getValueType(0);
10941   EVT N00VT = N00.getValueType();
10942   SDLoc DL(N);
10943
10944   // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10945   // the same size as the compared operands. Try to optimize sext(setcc())
10946   // if this is the case.
10947   if (VT.isVector() && !LegalOperations &&
10948       TLI.getBooleanContents(N00VT) ==
10949           TargetLowering::ZeroOrNegativeOneBooleanContent) {
10950     EVT SVT = getSetCCResultType(N00VT);
10951
10952     // If we already have the desired type, don't change it.
10953     if (SVT != N0.getValueType()) {
10954       // We know that the # elements of the results is the same as the
10955       // # elements of the compare (and the # elements of the compare result
10956       // for that matter).  Check to see that they are the same size.  If so,
10957       // we know that the element size of the sext'd result matches the
10958       // element size of the compare operands.
10959       if (VT.getSizeInBits() == SVT.getSizeInBits())
10960         return DAG.getSetCC(DL, VT, N00, N01, CC);
10961
10962       // If the desired elements are smaller or larger than the source
10963       // elements, we can use a matching integer vector type and then
10964       // truncate/sign extend.
10965       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10966       if (SVT == MatchingVecType) {
10967         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10968         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10969       }
10970     }
10971
10972     // Try to eliminate the sext of a setcc by zexting the compare operands.
10973     if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
10974         !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
10975       bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
10976       unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10977       unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10978
10979       // We have an unsupported narrow vector compare op that would be legal
10980       // if extended to the destination type. See if the compare operands
10981       // can be freely extended to the destination type.
10982       auto IsFreeToExtend = [&](SDValue V) {
10983         if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
10984           return true;
10985         // Match a simple, non-extended load that can be converted to a
10986         // legal {z/s}ext-load.
10987         // TODO: Allow widening of an existing {z/s}ext-load?
10988         if (!(ISD::isNON_EXTLoad(V.getNode()) &&
10989               ISD::isUNINDEXEDLoad(V.getNode()) &&
10990               cast<LoadSDNode>(V)->isSimple() &&
10991               TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
10992           return false;
10993
10994         // Non-chain users of this value must either be the setcc in this
10995         // sequence or extends that can be folded into the new {z/s}ext-load.
10996         for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
10997              UI != UE; ++UI) {
10998           // Skip uses of the chain and the setcc.
10999           SDNode *User = *UI;
11000           if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11001             continue;
11002           // Extra users must have exactly the same cast we are about to create.
11003           // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11004           //       is enhanced similarly.
11005           if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11006             return false;
11007         }
11008         return true;
11009       };
11010
11011       if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
11012         SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11013         SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11014         return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11015       }
11016     }
11017   }
11018
11019   // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11020   // Here, T can be 1 or -1, depending on the type of the setcc and
11021   // getBooleanContents().
11022   unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11023
11024   // To determine the "true" side of the select, we need to know the high bit
11025   // of the value returned by the setcc if it evaluates to true.
11026   // If the type of the setcc is i1, then the true case of the select is just
11027   // sext(i1 1), that is, -1.
11028   // If the type of the setcc is larger (say, i8) then the value of the high
11029   // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11030   // of the appropriate width.
11031   SDValue ExtTrueVal = (SetCCWidth == 1)
11032                            ? DAG.getAllOnesConstant(DL, VT)
11033                            : DAG.getBoolConstant(true, DL, VT, N00VT);
11034   SDValue Zero = DAG.getConstant(0, DL, VT);
11035   if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11036     return SCC;
11037
11038   if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11039     EVT SetCCVT = getSetCCResultType(N00VT);
11040     // Don't do this transform for i1 because there's a select transform
11041     // that would reverse it.
11042     // TODO: We should not do this transform at all without a target hook
11043     // because a sext is likely cheaper than a select?
11044     if (SetCCVT.getScalarSizeInBits() != 1 &&
11045         (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11046       SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11047       return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11048     }
11049   }
11050
11051   return SDValue();
11052 }
11053
11054 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11055   SDValue N0 = N->getOperand(0);
11056   EVT VT = N->getValueType(0);
11057   SDLoc DL(N);
11058
11059   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11060     return Res;
11061
11062   // fold (sext (sext x)) -> (sext x)
11063   // fold (sext (aext x)) -> (sext x)
11064   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11065     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11066
11067   if (N0.getOpcode() == ISD::TRUNCATE) {
11068     // fold (sext (truncate (load x))) -> (sext (smaller load x))
11069     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11070     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11071       SDNode *oye = N0.getOperand(0).getNode();
11072       if (NarrowLoad.getNode() != N0.getNode()) {
11073         CombineTo(N0.getNode(), NarrowLoad);
11074         // CombineTo deleted the truncate, if needed, but not what's under it.
11075         AddToWorklist(oye);
11076       }
11077       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11078     }
11079
11080     // See if the value being truncated is already sign extended.  If so, just
11081     // eliminate the trunc/sext pair.
11082     SDValue Op = N0.getOperand(0);
11083     unsigned OpBits   = Op.getScalarValueSizeInBits();
11084     unsigned MidBits  = N0.getScalarValueSizeInBits();
11085     unsigned DestBits = VT.getScalarSizeInBits();
11086     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11087
11088     if (OpBits == DestBits) {
11089       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
11090       // bits, it is already ready.
11091       if (NumSignBits > DestBits-MidBits)
11092         return Op;
11093     } else if (OpBits < DestBits) {
11094       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
11095       // bits, just sext from i32.
11096       if (NumSignBits > OpBits-MidBits)
11097         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11098     } else {
11099       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
11100       // bits, just truncate to i32.
11101       if (NumSignBits > OpBits-MidBits)
11102         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11103     }
11104
11105     // fold (sext (truncate x)) -> (sextinreg x).
11106     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11107                                                  N0.getValueType())) {
11108       if (OpBits < DestBits)
11109         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11110       else if (OpBits > DestBits)
11111         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11112       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11113                          DAG.getValueType(N0.getValueType()));
11114     }
11115   }
11116
11117   // Try to simplify (sext (load x)).
11118   if (SDValue foldedExt =
11119           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11120                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
11121     return foldedExt;
11122
11123   if (SDValue foldedExt =
11124       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11125                                ISD::SIGN_EXTEND))
11126     return foldedExt;
11127
11128   // fold (sext (load x)) to multiple smaller sextloads.
11129   // Only on illegal but splittable vectors.
11130   if (SDValue ExtLoad = CombineExtLoad(N))
11131     return ExtLoad;
11132
11133   // Try to simplify (sext (sextload x)).
11134   if (SDValue foldedExt = tryToFoldExtOfExtload(
11135           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11136     return foldedExt;
11137
11138   // fold (sext (and/or/xor (load x), cst)) ->
11139   //      (and/or/xor (sextload x), (sext cst))
11140   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11141        N0.getOpcode() == ISD::XOR) &&
11142       isa<LoadSDNode>(N0.getOperand(0)) &&
11143       N0.getOperand(1).getOpcode() == ISD::Constant &&
11144       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11145     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11146     EVT MemVT = LN00->getMemoryVT();
11147     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11148       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11149       SmallVector<SDNode*, 4> SetCCs;
11150       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11151                                              ISD::SIGN_EXTEND, SetCCs, TLI);
11152       if (DoXform) {
11153         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
11154                                          LN00->getChain(), LN00->getBasePtr(),
11155                                          LN00->getMemoryVT(),
11156                                          LN00->getMemOperand());
11157         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
11158         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11159                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
11160         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
11161         bool NoReplaceTruncAnd = !N0.hasOneUse();
11162         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11163         CombineTo(N, And);
11164         // If N0 has multiple uses, change other uses as well.
11165         if (NoReplaceTruncAnd) {
11166           SDValue TruncAnd =
11167               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11168           CombineTo(N0.getNode(), TruncAnd);
11169         }
11170         if (NoReplaceTrunc) {
11171           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11172         } else {
11173           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11174                                       LN00->getValueType(0), ExtLoad);
11175           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11176         }
11177         return SDValue(N,0); // Return N so it doesn't get rechecked!
11178       }
11179     }
11180   }
11181
11182   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11183     return V;
11184
11185   if (SDValue V = foldSextSetcc(N))
11186     return V;
11187
11188   // fold (sext x) -> (zext x) if the sign bit is known zero.
11189   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11190       DAG.SignBitIsZero(N0))
11191     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11192
11193   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11194     return NewVSel;
11195
11196   // Eliminate this sign extend by doing a negation in the destination type:
11197   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11198   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11199       isNullOrNullSplat(N0.getOperand(0)) &&
11200       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
11201       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
11202     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11203     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11204   }
11205   // Eliminate this sign extend by doing a decrement in the destination type:
11206   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11207   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11208       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
11209       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11210       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
11211     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11212     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11213   }
11214
11215   // fold sext (not i1 X) -> add (zext i1 X), -1
11216   // TODO: This could be extended to handle bool vectors.
11217   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11218       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11219                             TLI.isOperationLegal(ISD::ADD, VT)))) {
11220     // If we can eliminate the 'not', the sext form should be better
11221     if (SDValue NewXor = visitXOR(N0.getNode())) {
11222       // Returning N0 is a form of in-visit replacement that may have
11223       // invalidated N0.
11224       if (NewXor.getNode() == N0.getNode()) {
11225         // Return SDValue here as the xor should have already been replaced in
11226         // this sext.
11227         return SDValue();
11228       } else {
11229         // Return a new sext with the new xor.
11230         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11231       }
11232     }
11233
11234     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11235     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11236   }
11237
11238   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11239     return Res;
11240
11241   return SDValue();
11242 }
11243
11244 // isTruncateOf - If N is a truncate of some other value, return true, record
11245 // the value being truncated in Op and which of Op's bits are zero/one in Known.
11246 // This function computes KnownBits to avoid a duplicated call to
11247 // computeKnownBits in the caller.
11248 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
11249                          KnownBits &Known) {
11250   if (N->getOpcode() == ISD::TRUNCATE) {
11251     Op = N->getOperand(0);
11252     Known = DAG.computeKnownBits(Op);
11253     return true;
11254   }
11255
11256   if (N.getOpcode() != ISD::SETCC ||
11257       N.getValueType().getScalarType() != MVT::i1 ||
11258       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11259     return false;
11260
11261   SDValue Op0 = N->getOperand(0);
11262   SDValue Op1 = N->getOperand(1);
11263   assert(Op0.getValueType() == Op1.getValueType());
11264
11265   if (isNullOrNullSplat(Op0))
11266     Op = Op1;
11267   else if (isNullOrNullSplat(Op1))
11268     Op = Op0;
11269   else
11270     return false;
11271
11272   Known = DAG.computeKnownBits(Op);
11273
11274   return (Known.Zero | 1).isAllOnesValue();
11275 }
11276
11277 /// Given an extending node with a pop-count operand, if the target does not
11278 /// support a pop-count in the narrow source type but does support it in the
11279 /// destination type, widen the pop-count to the destination type.
11280 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
11281   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
11282           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
11283
11284   SDValue CtPop = Extend->getOperand(0);
11285   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11286     return SDValue();
11287
11288   EVT VT = Extend->getValueType(0);
11289   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11290   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11291       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11292     return SDValue();
11293
11294   // zext (ctpop X) --> ctpop (zext X)
11295   SDLoc DL(Extend);
11296   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11297   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11298 }
11299
11300 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11301   SDValue N0 = N->getOperand(0);
11302   EVT VT = N->getValueType(0);
11303
11304   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11305     return Res;
11306
11307   // fold (zext (zext x)) -> (zext x)
11308   // fold (zext (aext x)) -> (zext x)
11309   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11310     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11311                        N0.getOperand(0));
11312
11313   // fold (zext (truncate x)) -> (zext x) or
11314   //      (zext (truncate x)) -> (truncate x)
11315   // This is valid when the truncated bits of x are already zero.
11316   SDValue Op;
11317   KnownBits Known;
11318   if (isTruncateOf(DAG, N0, Op, Known)) {
11319     APInt TruncatedBits =
11320       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11321       APInt(Op.getScalarValueSizeInBits(), 0) :
11322       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11323                         N0.getScalarValueSizeInBits(),
11324                         std::min(Op.getScalarValueSizeInBits(),
11325                                  VT.getScalarSizeInBits()));
11326     if (TruncatedBits.isSubsetOf(Known.Zero))
11327       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11328   }
11329
11330   // fold (zext (truncate x)) -> (and x, mask)
11331   if (N0.getOpcode() == ISD::TRUNCATE) {
11332     // fold (zext (truncate (load x))) -> (zext (smaller load x))
11333     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11334     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11335       SDNode *oye = N0.getOperand(0).getNode();
11336       if (NarrowLoad.getNode() != N0.getNode()) {
11337         CombineTo(N0.getNode(), NarrowLoad);
11338         // CombineTo deleted the truncate, if needed, but not what's under it.
11339         AddToWorklist(oye);
11340       }
11341       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11342     }
11343
11344     EVT SrcVT = N0.getOperand(0).getValueType();
11345     EVT MinVT = N0.getValueType();
11346
11347     // Try to mask before the extension to avoid having to generate a larger mask,
11348     // possibly over several sub-vectors.
11349     if (SrcVT.bitsLT(VT) && VT.isVector()) {
11350       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11351                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11352         SDValue Op = N0.getOperand(0);
11353         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11354         AddToWorklist(Op.getNode());
11355         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11356         // Transfer the debug info; the new node is equivalent to N0.
11357         DAG.transferDbgValues(N0, ZExtOrTrunc);
11358         return ZExtOrTrunc;
11359       }
11360     }
11361
11362     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11363       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11364       AddToWorklist(Op.getNode());
11365       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11366       // We may safely transfer the debug info describing the truncate node over
11367       // to the equivalent and operation.
11368       DAG.transferDbgValues(N0, And);
11369       return And;
11370     }
11371   }
11372
11373   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11374   // if either of the casts is not free.
11375   if (N0.getOpcode() == ISD::AND &&
11376       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11377       N0.getOperand(1).getOpcode() == ISD::Constant &&
11378       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11379                            N0.getValueType()) ||
11380        !TLI.isZExtFree(N0.getValueType(), VT))) {
11381     SDValue X = N0.getOperand(0).getOperand(0);
11382     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11383     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11384     SDLoc DL(N);
11385     return DAG.getNode(ISD::AND, DL, VT,
11386                        X, DAG.getConstant(Mask, DL, VT));
11387   }
11388
11389   // Try to simplify (zext (load x)).
11390   if (SDValue foldedExt =
11391           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11392                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11393     return foldedExt;
11394
11395   if (SDValue foldedExt =
11396       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11397                                ISD::ZERO_EXTEND))
11398     return foldedExt;
11399
11400   // fold (zext (load x)) to multiple smaller zextloads.
11401   // Only on illegal but splittable vectors.
11402   if (SDValue ExtLoad = CombineExtLoad(N))
11403     return ExtLoad;
11404
11405   // fold (zext (and/or/xor (load x), cst)) ->
11406   //      (and/or/xor (zextload x), (zext cst))
11407   // Unless (and (load x) cst) will match as a zextload already and has
11408   // additional users.
11409   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11410        N0.getOpcode() == ISD::XOR) &&
11411       isa<LoadSDNode>(N0.getOperand(0)) &&
11412       N0.getOperand(1).getOpcode() == ISD::Constant &&
11413       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11414     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11415     EVT MemVT = LN00->getMemoryVT();
11416     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11417         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11418       bool DoXform = true;
11419       SmallVector<SDNode*, 4> SetCCs;
11420       if (!N0.hasOneUse()) {
11421         if (N0.getOpcode() == ISD::AND) {
11422           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11423           EVT LoadResultTy = AndC->getValueType(0);
11424           EVT ExtVT;
11425           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11426             DoXform = false;
11427         }
11428       }
11429       if (DoXform)
11430         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11431                                           ISD::ZERO_EXTEND, SetCCs, TLI);
11432       if (DoXform) {
11433         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11434                                          LN00->getChain(), LN00->getBasePtr(),
11435                                          LN00->getMemoryVT(),
11436                                          LN00->getMemOperand());
11437         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11438         SDLoc DL(N);
11439         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11440                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
11441         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11442         bool NoReplaceTruncAnd = !N0.hasOneUse();
11443         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11444         CombineTo(N, And);
11445         // If N0 has multiple uses, change other uses as well.
11446         if (NoReplaceTruncAnd) {
11447           SDValue TruncAnd =
11448               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11449           CombineTo(N0.getNode(), TruncAnd);
11450         }
11451         if (NoReplaceTrunc) {
11452           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11453         } else {
11454           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11455                                       LN00->getValueType(0), ExtLoad);
11456           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11457         }
11458         return SDValue(N,0); // Return N so it doesn't get rechecked!
11459       }
11460     }
11461   }
11462
11463   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11464   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11465   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11466     return ZExtLoad;
11467
11468   // Try to simplify (zext (zextload x)).
11469   if (SDValue foldedExt = tryToFoldExtOfExtload(
11470           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11471     return foldedExt;
11472
11473   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11474     return V;
11475
11476   if (N0.getOpcode() == ISD::SETCC) {
11477     // Only do this before legalize for now.
11478     if (!LegalOperations && VT.isVector() &&
11479         N0.getValueType().getVectorElementType() == MVT::i1) {
11480       EVT N00VT = N0.getOperand(0).getValueType();
11481       if (getSetCCResultType(N00VT) == N0.getValueType())
11482         return SDValue();
11483
11484       // We know that the # elements of the results is the same as the #
11485       // elements of the compare (and the # elements of the compare result for
11486       // that matter). Check to see that they are the same size. If so, we know
11487       // that the element size of the sext'd result matches the element size of
11488       // the compare operands.
11489       SDLoc DL(N);
11490       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11491         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11492         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11493                                      N0.getOperand(1), N0.getOperand(2));
11494         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11495       }
11496
11497       // If the desired elements are smaller or larger than the source
11498       // elements we can use a matching integer vector type and then
11499       // truncate/any extend followed by zext_in_reg.
11500       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11501       SDValue VsetCC =
11502           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11503                       N0.getOperand(1), N0.getOperand(2));
11504       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11505                                     N0.getValueType());
11506     }
11507
11508     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11509     SDLoc DL(N);
11510     EVT N0VT = N0.getValueType();
11511     EVT N00VT = N0.getOperand(0).getValueType();
11512     if (SDValue SCC = SimplifySelectCC(
11513             DL, N0.getOperand(0), N0.getOperand(1),
11514             DAG.getBoolConstant(true, DL, N0VT, N00VT),
11515             DAG.getBoolConstant(false, DL, N0VT, N00VT),
11516             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11517       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11518   }
11519
11520   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11521   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11522       isa<ConstantSDNode>(N0.getOperand(1)) &&
11523       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11524       N0.hasOneUse()) {
11525     SDValue ShAmt = N0.getOperand(1);
11526     if (N0.getOpcode() == ISD::SHL) {
11527       SDValue InnerZExt = N0.getOperand(0);
11528       // If the original shl may be shifting out bits, do not perform this
11529       // transformation.
11530       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11531         InnerZExt.getOperand(0).getValueSizeInBits();
11532       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11533         return SDValue();
11534     }
11535
11536     SDLoc DL(N);
11537
11538     // Ensure that the shift amount is wide enough for the shifted value.
11539     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11540       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11541
11542     return DAG.getNode(N0.getOpcode(), DL, VT,
11543                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11544                        ShAmt);
11545   }
11546
11547   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11548     return NewVSel;
11549
11550   if (SDValue NewCtPop = widenCtPop(N, DAG))
11551     return NewCtPop;
11552
11553   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11554     return Res;
11555
11556   return SDValue();
11557 }
11558
11559 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11560   SDValue N0 = N->getOperand(0);
11561   EVT VT = N->getValueType(0);
11562
11563   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11564     return Res;
11565
11566   // fold (aext (aext x)) -> (aext x)
11567   // fold (aext (zext x)) -> (zext x)
11568   // fold (aext (sext x)) -> (sext x)
11569   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
11570       N0.getOpcode() == ISD::ZERO_EXTEND ||
11571       N0.getOpcode() == ISD::SIGN_EXTEND)
11572     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11573
11574   // fold (aext (truncate (load x))) -> (aext (smaller load x))
11575   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
11576   if (N0.getOpcode() == ISD::TRUNCATE) {
11577     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11578       SDNode *oye = N0.getOperand(0).getNode();
11579       if (NarrowLoad.getNode() != N0.getNode()) {
11580         CombineTo(N0.getNode(), NarrowLoad);
11581         // CombineTo deleted the truncate, if needed, but not what's under it.
11582         AddToWorklist(oye);
11583       }
11584       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11585     }
11586   }
11587
11588   // fold (aext (truncate x))
11589   if (N0.getOpcode() == ISD::TRUNCATE)
11590     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11591
11592   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
11593   // if the trunc is not free.
11594   if (N0.getOpcode() == ISD::AND &&
11595       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11596       N0.getOperand(1).getOpcode() == ISD::Constant &&
11597       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11598                           N0.getValueType())) {
11599     SDLoc DL(N);
11600     SDValue X = N0.getOperand(0).getOperand(0);
11601     X = DAG.getAnyExtOrTrunc(X, DL, VT);
11602     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11603     return DAG.getNode(ISD::AND, DL, VT,
11604                        X, DAG.getConstant(Mask, DL, VT));
11605   }
11606
11607   // fold (aext (load x)) -> (aext (truncate (extload x)))
11608   // None of the supported targets knows how to perform load and any_ext
11609   // on vectors in one instruction, so attempt to fold to zext instead.
11610   if (VT.isVector()) {
11611     // Try to simplify (zext (load x)).
11612     if (SDValue foldedExt =
11613             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11614                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11615       return foldedExt;
11616   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
11617              ISD::isUNINDEXEDLoad(N0.getNode()) &&
11618              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11619     bool DoXform = true;
11620     SmallVector<SDNode *, 4> SetCCs;
11621     if (!N0.hasOneUse())
11622       DoXform =
11623           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
11624     if (DoXform) {
11625       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11626       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11627                                        LN0->getChain(), LN0->getBasePtr(),
11628                                        N0.getValueType(), LN0->getMemOperand());
11629       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
11630       // If the load value is used only by N, replace it via CombineTo N.
11631       bool NoReplaceTrunc = N0.hasOneUse();
11632       CombineTo(N, ExtLoad);
11633       if (NoReplaceTrunc) {
11634         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11635         recursivelyDeleteUnusedNodes(LN0);
11636       } else {
11637         SDValue Trunc =
11638             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11639         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11640       }
11641       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11642     }
11643   }
11644
11645   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
11646   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
11647   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
11648   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
11649       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
11650     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11651     ISD::LoadExtType ExtType = LN0->getExtensionType();
11652     EVT MemVT = LN0->getMemoryVT();
11653     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
11654       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
11655                                        VT, LN0->getChain(), LN0->getBasePtr(),
11656                                        MemVT, LN0->getMemOperand());
11657       CombineTo(N, ExtLoad);
11658       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11659       recursivelyDeleteUnusedNodes(LN0);
11660       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11661     }
11662   }
11663
11664   if (N0.getOpcode() == ISD::SETCC) {
11665     // For vectors:
11666     // aext(setcc) -> vsetcc
11667     // aext(setcc) -> truncate(vsetcc)
11668     // aext(setcc) -> aext(vsetcc)
11669     // Only do this before legalize for now.
11670     if (VT.isVector() && !LegalOperations) {
11671       EVT N00VT = N0.getOperand(0).getValueType();
11672       if (getSetCCResultType(N00VT) == N0.getValueType())
11673         return SDValue();
11674
11675       // We know that the # elements of the results is the same as the
11676       // # elements of the compare (and the # elements of the compare result
11677       // for that matter).  Check to see that they are the same size.  If so,
11678       // we know that the element size of the sext'd result matches the
11679       // element size of the compare operands.
11680       if (VT.getSizeInBits() == N00VT.getSizeInBits())
11681         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
11682                              N0.getOperand(1),
11683                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
11684
11685       // If the desired elements are smaller or larger than the source
11686       // elements we can use a matching integer vector type and then
11687       // truncate/any extend
11688       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11689       SDValue VsetCC =
11690         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
11691                       N0.getOperand(1),
11692                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
11693       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
11694     }
11695
11696     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
11697     SDLoc DL(N);
11698     if (SDValue SCC = SimplifySelectCC(
11699             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
11700             DAG.getConstant(0, DL, VT),
11701             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11702       return SCC;
11703   }
11704
11705   if (SDValue NewCtPop = widenCtPop(N, DAG))
11706     return NewCtPop;
11707
11708   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11709     return Res;
11710
11711   return SDValue();
11712 }
11713
11714 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
11715   unsigned Opcode = N->getOpcode();
11716   SDValue N0 = N->getOperand(0);
11717   SDValue N1 = N->getOperand(1);
11718   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
11719
11720   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
11721   if (N0.getOpcode() == Opcode &&
11722       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
11723     return N0;
11724
11725   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11726       N0.getOperand(0).getOpcode() == Opcode) {
11727     // We have an assert, truncate, assert sandwich. Make one stronger assert
11728     // by asserting on the smallest asserted type to the larger source type.
11729     // This eliminates the later assert:
11730     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
11731     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
11732     SDValue BigA = N0.getOperand(0);
11733     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11734     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
11735            "Asserting zero/sign-extended bits to a type larger than the "
11736            "truncated destination does not provide information");
11737
11738     SDLoc DL(N);
11739     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
11740     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
11741     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11742                                     BigA.getOperand(0), MinAssertVTVal);
11743     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11744   }
11745
11746   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
11747   // than X. Just move the AssertZext in front of the truncate and drop the
11748   // AssertSExt.
11749   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11750       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
11751       Opcode == ISD::AssertZext) {
11752     SDValue BigA = N0.getOperand(0);
11753     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11754     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
11755            "Asserting zero/sign-extended bits to a type larger than the "
11756            "truncated destination does not provide information");
11757
11758     if (AssertVT.bitsLT(BigA_AssertVT)) {
11759       SDLoc DL(N);
11760       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11761                                       BigA.getOperand(0), N1);
11762       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11763     }
11764   }
11765
11766   return SDValue();
11767 }
11768
11769 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
11770   SDLoc DL(N);
11771
11772   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
11773   SDValue N0 = N->getOperand(0);
11774
11775   // Fold (assertalign (assertalign x, AL0), AL1) ->
11776   // (assertalign x, max(AL0, AL1))
11777   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
11778     return DAG.getAssertAlign(DL, N0.getOperand(0),
11779                               std::max(AL, AAN->getAlign()));
11780
11781   // In rare cases, there are trivial arithmetic ops in source operands. Sink
11782   // this assert down to source operands so that those arithmetic ops could be
11783   // exposed to the DAG combining.
11784   switch (N0.getOpcode()) {
11785   default:
11786     break;
11787   case ISD::ADD:
11788   case ISD::SUB: {
11789     unsigned AlignShift = Log2(AL);
11790     SDValue LHS = N0.getOperand(0);
11791     SDValue RHS = N0.getOperand(1);
11792     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
11793     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11794     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
11795       if (LHSAlignShift < AlignShift)
11796         LHS = DAG.getAssertAlign(DL, LHS, AL);
11797       if (RHSAlignShift < AlignShift)
11798         RHS = DAG.getAssertAlign(DL, RHS, AL);
11799       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
11800     }
11801     break;
11802   }
11803   }
11804
11805   return SDValue();
11806 }
11807
11808 /// If the result of a wider load is shifted to right of N  bits and then
11809 /// truncated to a narrower type and where N is a multiple of number of bits of
11810 /// the narrower type, transform it to a narrower load from address + N / num of
11811 /// bits of new type. Also narrow the load if the result is masked with an AND
11812 /// to effectively produce a smaller type. If the result is to be extended, also
11813 /// fold the extension to form a extending load.
11814 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
11815   unsigned Opc = N->getOpcode();
11816
11817   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
11818   SDValue N0 = N->getOperand(0);
11819   EVT VT = N->getValueType(0);
11820   EVT ExtVT = VT;
11821
11822   // This transformation isn't valid for vector loads.
11823   if (VT.isVector())
11824     return SDValue();
11825
11826   unsigned ShAmt = 0;
11827   bool HasShiftedOffset = false;
11828   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
11829   // extended to VT.
11830   if (Opc == ISD::SIGN_EXTEND_INREG) {
11831     ExtType = ISD::SEXTLOAD;
11832     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11833   } else if (Opc == ISD::SRL) {
11834     // Another special-case: SRL is basically zero-extending a narrower value,
11835     // or it maybe shifting a higher subword, half or byte into the lowest
11836     // bits.
11837     ExtType = ISD::ZEXTLOAD;
11838     N0 = SDValue(N, 0);
11839
11840     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
11841     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11842     if (!N01 || !LN0)
11843       return SDValue();
11844
11845     uint64_t ShiftAmt = N01->getZExtValue();
11846     uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
11847     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
11848       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
11849     else
11850       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
11851                                 VT.getScalarSizeInBits() - ShiftAmt);
11852   } else if (Opc == ISD::AND) {
11853     // An AND with a constant mask is the same as a truncate + zero-extend.
11854     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
11855     if (!AndC)
11856       return SDValue();
11857
11858     const APInt &Mask = AndC->getAPIntValue();
11859     unsigned ActiveBits = 0;
11860     if (Mask.isMask()) {
11861       ActiveBits = Mask.countTrailingOnes();
11862     } else if (Mask.isShiftedMask()) {
11863       ShAmt = Mask.countTrailingZeros();
11864       APInt ShiftedMask = Mask.lshr(ShAmt);
11865       ActiveBits = ShiftedMask.countTrailingOnes();
11866       HasShiftedOffset = true;
11867     } else
11868       return SDValue();
11869
11870     ExtType = ISD::ZEXTLOAD;
11871     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
11872   }
11873
11874   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
11875     SDValue SRL = N0;
11876     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
11877       ShAmt = ConstShift->getZExtValue();
11878       unsigned EVTBits = ExtVT.getScalarSizeInBits();
11879       // Is the shift amount a multiple of size of VT?
11880       if ((ShAmt & (EVTBits-1)) == 0) {
11881         N0 = N0.getOperand(0);
11882         // Is the load width a multiple of size of VT?
11883         if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
11884           return SDValue();
11885       }
11886
11887       // At this point, we must have a load or else we can't do the transform.
11888       auto *LN0 = dyn_cast<LoadSDNode>(N0);
11889       if (!LN0) return SDValue();
11890
11891       // Because a SRL must be assumed to *need* to zero-extend the high bits
11892       // (as opposed to anyext the high bits), we can't combine the zextload
11893       // lowering of SRL and an sextload.
11894       if (LN0->getExtensionType() == ISD::SEXTLOAD)
11895         return SDValue();
11896
11897       // If the shift amount is larger than the input type then we're not
11898       // accessing any of the loaded bytes.  If the load was a zextload/extload
11899       // then the result of the shift+trunc is zero/undef (handled elsewhere).
11900       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
11901         return SDValue();
11902
11903       // If the SRL is only used by a masking AND, we may be able to adjust
11904       // the ExtVT to make the AND redundant.
11905       SDNode *Mask = *(SRL->use_begin());
11906       if (Mask->getOpcode() == ISD::AND &&
11907           isa<ConstantSDNode>(Mask->getOperand(1))) {
11908         const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
11909         if (ShiftMask.isMask()) {
11910           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
11911                                            ShiftMask.countTrailingOnes());
11912           // If the mask is smaller, recompute the type.
11913           if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
11914               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
11915             ExtVT = MaskedVT;
11916         }
11917       }
11918     }
11919   }
11920
11921   // If the load is shifted left (and the result isn't shifted back right),
11922   // we can fold the truncate through the shift.
11923   unsigned ShLeftAmt = 0;
11924   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11925       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
11926     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
11927       ShLeftAmt = N01->getZExtValue();
11928       N0 = N0.getOperand(0);
11929     }
11930   }
11931
11932   // If we haven't found a load, we can't narrow it.
11933   if (!isa<LoadSDNode>(N0))
11934     return SDValue();
11935
11936   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11937   // Reducing the width of a volatile load is illegal.  For atomics, we may be
11938   // able to reduce the width provided we never widen again. (see D66309)
11939   if (!LN0->isSimple() ||
11940       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
11941     return SDValue();
11942
11943   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
11944     unsigned LVTStoreBits =
11945         LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
11946     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
11947     return LVTStoreBits - EVTStoreBits - ShAmt;
11948   };
11949
11950   // For big endian targets, we need to adjust the offset to the pointer to
11951   // load the correct bytes.
11952   if (DAG.getDataLayout().isBigEndian())
11953     ShAmt = AdjustBigEndianShift(ShAmt);
11954
11955   uint64_t PtrOff = ShAmt / 8;
11956   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
11957   SDLoc DL(LN0);
11958   // The original load itself didn't wrap, so an offset within it doesn't.
11959   SDNodeFlags Flags;
11960   Flags.setNoUnsignedWrap(true);
11961   SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
11962                                             TypeSize::Fixed(PtrOff), DL, Flags);
11963   AddToWorklist(NewPtr.getNode());
11964
11965   SDValue Load;
11966   if (ExtType == ISD::NON_EXTLOAD)
11967     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
11968                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11969                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11970   else
11971     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
11972                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
11973                           NewAlign, LN0->getMemOperand()->getFlags(),
11974                           LN0->getAAInfo());
11975
11976   // Replace the old load's chain with the new load's chain.
11977   WorklistRemover DeadNodes(*this);
11978   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11979
11980   // Shift the result left, if we've swallowed a left shift.
11981   SDValue Result = Load;
11982   if (ShLeftAmt != 0) {
11983     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
11984     if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
11985       ShImmTy = VT;
11986     // If the shift amount is as large as the result size (but, presumably,
11987     // no larger than the source) then the useful bits of the result are
11988     // zero; we can't simply return the shortened shift, because the result
11989     // of that operation is undefined.
11990     if (ShLeftAmt >= VT.getScalarSizeInBits())
11991       Result = DAG.getConstant(0, DL, VT);
11992     else
11993       Result = DAG.getNode(ISD::SHL, DL, VT,
11994                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
11995   }
11996
11997   if (HasShiftedOffset) {
11998     // Recalculate the shift amount after it has been altered to calculate
11999     // the offset.
12000     if (DAG.getDataLayout().isBigEndian())
12001       ShAmt = AdjustBigEndianShift(ShAmt);
12002
12003     // We're using a shifted mask, so the load now has an offset. This means
12004     // that data has been loaded into the lower bytes than it would have been
12005     // before, so we need to shl the loaded data into the correct position in the
12006     // register.
12007     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12008     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12009     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12010   }
12011
12012   // Return the new loaded value.
12013   return Result;
12014 }
12015
12016 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12017   SDValue N0 = N->getOperand(0);
12018   SDValue N1 = N->getOperand(1);
12019   EVT VT = N->getValueType(0);
12020   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12021   unsigned VTBits = VT.getScalarSizeInBits();
12022   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12023
12024   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12025   if (N0.isUndef())
12026     return DAG.getConstant(0, SDLoc(N), VT);
12027
12028   // fold (sext_in_reg c1) -> c1
12029   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
12030     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12031
12032   // If the input is already sign extended, just drop the extension.
12033   if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
12034     return N0;
12035
12036   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12037   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12038       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12039     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12040                        N1);
12041
12042   // fold (sext_in_reg (sext x)) -> (sext x)
12043   // fold (sext_in_reg (aext x)) -> (sext x)
12044   // if x is small enough or if we know that x has more than 1 sign bit and the
12045   // sign_extend_inreg is extending from one of them.
12046   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12047     SDValue N00 = N0.getOperand(0);
12048     unsigned N00Bits = N00.getScalarValueSizeInBits();
12049     if ((N00Bits <= ExtVTBits ||
12050          (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
12051         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12052       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12053   }
12054
12055   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12056   // if x is small enough or if we know that x has more than 1 sign bit and the
12057   // sign_extend_inreg is extending from one of them.
12058   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
12059       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
12060       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
12061     SDValue N00 = N0.getOperand(0);
12062     unsigned N00Bits = N00.getScalarValueSizeInBits();
12063     unsigned DstElts = N0.getValueType().getVectorMinNumElements();
12064     unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12065     bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
12066     APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
12067     if ((N00Bits == ExtVTBits ||
12068          (!IsZext && (N00Bits < ExtVTBits ||
12069                       (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
12070                           ExtVTBits))) &&
12071         (!LegalOperations ||
12072          TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
12073       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12074   }
12075
12076   // fold (sext_in_reg (zext x)) -> (sext x)
12077   // iff we are extending the source sign bit.
12078   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12079     SDValue N00 = N0.getOperand(0);
12080     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12081         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12082       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12083   }
12084
12085   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12086   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12087     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12088
12089   // fold operands of sext_in_reg based on knowledge that the top bits are not
12090   // demanded.
12091   if (SimplifyDemandedBits(SDValue(N, 0)))
12092     return SDValue(N, 0);
12093
12094   // fold (sext_in_reg (load x)) -> (smaller sextload x)
12095   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12096   if (SDValue NarrowLoad = ReduceLoadWidth(N))
12097     return NarrowLoad;
12098
12099   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12100   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12101   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12102   if (N0.getOpcode() == ISD::SRL) {
12103     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12104       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12105         // We can turn this into an SRA iff the input to the SRL is already sign
12106         // extended enough.
12107         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12108         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12109           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12110                              N0.getOperand(1));
12111       }
12112   }
12113
12114   // fold (sext_inreg (extload x)) -> (sextload x)
12115   // If sextload is not supported by target, we can only do the combine when
12116   // load has one use. Doing otherwise can block folding the extload with other
12117   // extends that the target does support.
12118   if (ISD::isEXTLoad(N0.getNode()) &&
12119       ISD::isUNINDEXEDLoad(N0.getNode()) &&
12120       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12121       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12122         N0.hasOneUse()) ||
12123        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12124     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12125     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12126                                      LN0->getChain(),
12127                                      LN0->getBasePtr(), ExtVT,
12128                                      LN0->getMemOperand());
12129     CombineTo(N, ExtLoad);
12130     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12131     AddToWorklist(ExtLoad.getNode());
12132     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12133   }
12134
12135   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12136   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
12137       N0.hasOneUse() &&
12138       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12139       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12140        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12141     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12142     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12143                                      LN0->getChain(),
12144                                      LN0->getBasePtr(), ExtVT,
12145                                      LN0->getMemOperand());
12146     CombineTo(N, ExtLoad);
12147     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12148     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12149   }
12150
12151   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12152   // ignore it if the masked load is already sign extended
12153   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
12154     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12155         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12156         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
12157       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
12158           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12159           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12160           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12161       CombineTo(N, ExtMaskedLoad);
12162       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12163       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12164     }
12165   }
12166
12167   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12168   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12169     if (SDValue(GN0, 0).hasOneUse() &&
12170         ExtVT == GN0->getMemoryVT() &&
12171         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
12172       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
12173                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
12174
12175       SDValue ExtLoad = DAG.getMaskedGather(
12176           DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12177           GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12178
12179       CombineTo(N, ExtLoad);
12180       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12181       AddToWorklist(ExtLoad.getNode());
12182       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12183     }
12184   }
12185
12186   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12187   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12188     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
12189                                            N0.getOperand(1), false))
12190       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12191   }
12192
12193   return SDValue();
12194 }
12195
12196 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12197   SDValue N0 = N->getOperand(0);
12198   EVT VT = N->getValueType(0);
12199
12200   // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12201   if (N0.isUndef())
12202     return DAG.getConstant(0, SDLoc(N), VT);
12203
12204   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12205     return Res;
12206
12207   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12208     return SDValue(N, 0);
12209
12210   return SDValue();
12211 }
12212
12213 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12214   SDValue N0 = N->getOperand(0);
12215   EVT VT = N->getValueType(0);
12216   EVT SrcVT = N0.getValueType();
12217   bool isLE = DAG.getDataLayout().isLittleEndian();
12218
12219   // noop truncate
12220   if (SrcVT == VT)
12221     return N0;
12222
12223   // fold (truncate (truncate x)) -> (truncate x)
12224   if (N0.getOpcode() == ISD::TRUNCATE)
12225     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12226
12227   // fold (truncate c1) -> c1
12228   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
12229     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12230     if (C.getNode() != N)
12231       return C;
12232   }
12233
12234   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12235   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12236       N0.getOpcode() == ISD::SIGN_EXTEND ||
12237       N0.getOpcode() == ISD::ANY_EXTEND) {
12238     // if the source is smaller than the dest, we still need an extend.
12239     if (N0.getOperand(0).getValueType().bitsLT(VT))
12240       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12241     // if the source is larger than the dest, than we just need the truncate.
12242     if (N0.getOperand(0).getValueType().bitsGT(VT))
12243       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12244     // if the source and dest are the same type, we can drop both the extend
12245     // and the truncate.
12246     return N0.getOperand(0);
12247   }
12248
12249   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12250   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12251     return SDValue();
12252
12253   // Fold extract-and-trunc into a narrow extract. For example:
12254   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12255   //   i32 y = TRUNCATE(i64 x)
12256   //        -- becomes --
12257   //   v16i8 b = BITCAST (v2i64 val)
12258   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12259   //
12260   // Note: We only run this optimization after type legalization (which often
12261   // creates this pattern) and before operation legalization after which
12262   // we need to be more careful about the vector instructions that we generate.
12263   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12264       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12265     EVT VecTy = N0.getOperand(0).getValueType();
12266     EVT ExTy = N0.getValueType();
12267     EVT TrTy = N->getValueType(0);
12268
12269     auto EltCnt = VecTy.getVectorElementCount();
12270     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12271     auto NewEltCnt = EltCnt * SizeRatio;
12272
12273     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
12274     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
12275
12276     SDValue EltNo = N0->getOperand(1);
12277     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
12278       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12279       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12280
12281       SDLoc DL(N);
12282       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
12283                          DAG.getBitcast(NVT, N0.getOperand(0)),
12284                          DAG.getVectorIdxConstant(Index, DL));
12285     }
12286   }
12287
12288   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12289   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12290     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12291         TLI.isTruncateFree(SrcVT, VT)) {
12292       SDLoc SL(N0);
12293       SDValue Cond = N0.getOperand(0);
12294       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12295       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12296       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12297     }
12298   }
12299
12300   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12301   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12302       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12303       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12304     SDValue Amt = N0.getOperand(1);
12305     KnownBits Known = DAG.computeKnownBits(Amt);
12306     unsigned Size = VT.getScalarSizeInBits();
12307     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
12308       SDLoc SL(N);
12309       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12310
12311       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12312       if (AmtVT != Amt.getValueType()) {
12313         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12314         AddToWorklist(Amt.getNode());
12315       }
12316       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12317     }
12318   }
12319
12320   if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12321     return V;
12322
12323   // Attempt to pre-truncate BUILD_VECTOR sources.
12324   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12325       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12326       // Avoid creating illegal types if running after type legalizer.
12327       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12328     SDLoc DL(N);
12329     EVT SVT = VT.getScalarType();
12330     SmallVector<SDValue, 8> TruncOps;
12331     for (const SDValue &Op : N0->op_values()) {
12332       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12333       TruncOps.push_back(TruncOp);
12334     }
12335     return DAG.getBuildVector(VT, DL, TruncOps);
12336   }
12337
12338   // Fold a series of buildvector, bitcast, and truncate if possible.
12339   // For example fold
12340   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12341   //   (2xi32 (buildvector x, y)).
12342   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12343       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12344       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12345       N0.getOperand(0).hasOneUse()) {
12346     SDValue BuildVect = N0.getOperand(0);
12347     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12348     EVT TruncVecEltTy = VT.getVectorElementType();
12349
12350     // Check that the element types match.
12351     if (BuildVectEltTy == TruncVecEltTy) {
12352       // Now we only need to compute the offset of the truncated elements.
12353       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
12354       unsigned TruncVecNumElts = VT.getVectorNumElements();
12355       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12356
12357       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
12358              "Invalid number of elements");
12359
12360       SmallVector<SDValue, 8> Opnds;
12361       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12362         Opnds.push_back(BuildVect.getOperand(i));
12363
12364       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12365     }
12366   }
12367
12368   // See if we can simplify the input to this truncate through knowledge that
12369   // only the low bits are being used.
12370   // For example "trunc (or (shl x, 8), y)" // -> trunc y
12371   // Currently we only perform this optimization on scalars because vectors
12372   // may have different active low bits.
12373   if (!VT.isVector()) {
12374     APInt Mask =
12375         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
12376     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12377       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12378   }
12379
12380   // fold (truncate (load x)) -> (smaller load x)
12381   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12382   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12383     if (SDValue Reduced = ReduceLoadWidth(N))
12384       return Reduced;
12385
12386     // Handle the case where the load remains an extending load even
12387     // after truncation.
12388     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12389       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12390       if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12391         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12392                                          VT, LN0->getChain(), LN0->getBasePtr(),
12393                                          LN0->getMemoryVT(),
12394                                          LN0->getMemOperand());
12395         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12396         return NewLoad;
12397       }
12398     }
12399   }
12400
12401   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12402   // where ... are all 'undef'.
12403   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12404     SmallVector<EVT, 8> VTs;
12405     SDValue V;
12406     unsigned Idx = 0;
12407     unsigned NumDefs = 0;
12408
12409     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12410       SDValue X = N0.getOperand(i);
12411       if (!X.isUndef()) {
12412         V = X;
12413         Idx = i;
12414         NumDefs++;
12415       }
12416       // Stop if more than one members are non-undef.
12417       if (NumDefs > 1)
12418         break;
12419
12420       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12421                                      VT.getVectorElementType(),
12422                                      X.getValueType().getVectorElementCount()));
12423     }
12424
12425     if (NumDefs == 0)
12426       return DAG.getUNDEF(VT);
12427
12428     if (NumDefs == 1) {
12429       assert(V.getNode() && "The single defined operand is empty!");
12430       SmallVector<SDValue, 8> Opnds;
12431       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12432         if (i != Idx) {
12433           Opnds.push_back(DAG.getUNDEF(VTs[i]));
12434           continue;
12435         }
12436         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12437         AddToWorklist(NV.getNode());
12438         Opnds.push_back(NV);
12439       }
12440       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12441     }
12442   }
12443
12444   // Fold truncate of a bitcast of a vector to an extract of the low vector
12445   // element.
12446   //
12447   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12448   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12449     SDValue VecSrc = N0.getOperand(0);
12450     EVT VecSrcVT = VecSrc.getValueType();
12451     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12452         (!LegalOperations ||
12453          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12454       SDLoc SL(N);
12455
12456       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12457       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12458                          DAG.getVectorIdxConstant(Idx, SL));
12459     }
12460   }
12461
12462   // Simplify the operands using demanded-bits information.
12463   if (SimplifyDemandedBits(SDValue(N, 0)))
12464     return SDValue(N, 0);
12465
12466   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12467   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12468   // When the adde's carry is not used.
12469   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12470       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12471       // We only do for addcarry before legalize operation
12472       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12473        TLI.isOperationLegal(N0.getOpcode(), VT))) {
12474     SDLoc SL(N);
12475     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12476     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12477     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12478     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12479   }
12480
12481   // fold (truncate (extract_subvector(ext x))) ->
12482   //      (extract_subvector x)
12483   // TODO: This can be generalized to cover cases where the truncate and extract
12484   // do not fully cancel each other out.
12485   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12486     SDValue N00 = N0.getOperand(0);
12487     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12488         N00.getOpcode() == ISD::ZERO_EXTEND ||
12489         N00.getOpcode() == ISD::ANY_EXTEND) {
12490       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12491           VT.getVectorElementType())
12492         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12493                            N00.getOperand(0), N0.getOperand(1));
12494     }
12495   }
12496
12497   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12498     return NewVSel;
12499
12500   // Narrow a suitable binary operation with a non-opaque constant operand by
12501   // moving it ahead of the truncate. This is limited to pre-legalization
12502   // because targets may prefer a wider type during later combines and invert
12503   // this transform.
12504   switch (N0.getOpcode()) {
12505   case ISD::ADD:
12506   case ISD::SUB:
12507   case ISD::MUL:
12508   case ISD::AND:
12509   case ISD::OR:
12510   case ISD::XOR:
12511     if (!LegalOperations && N0.hasOneUse() &&
12512         (isConstantOrConstantVector(N0.getOperand(0), true) ||
12513          isConstantOrConstantVector(N0.getOperand(1), true))) {
12514       // TODO: We already restricted this to pre-legalization, but for vectors
12515       // we are extra cautious to not create an unsupported operation.
12516       // Target-specific changes are likely needed to avoid regressions here.
12517       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12518         SDLoc DL(N);
12519         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12520         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12521         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12522       }
12523     }
12524     break;
12525   case ISD::USUBSAT:
12526     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12527     // enough to know that the upper bits are zero we must ensure that we don't
12528     // introduce an extra truncate.
12529     if (!LegalOperations && N0.hasOneUse() &&
12530         N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12531         N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
12532             VT.getScalarSizeInBits() &&
12533         hasOperation(N0.getOpcode(), VT)) {
12534       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12535                                  DAG, SDLoc(N));
12536     }
12537     break;
12538   }
12539
12540   return SDValue();
12541 }
12542
12543 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
12544   SDValue Elt = N->getOperand(i);
12545   if (Elt.getOpcode() != ISD::MERGE_VALUES)
12546     return Elt.getNode();
12547   return Elt.getOperand(Elt.getResNo()).getNode();
12548 }
12549
12550 /// build_pair (load, load) -> load
12551 /// if load locations are consecutive.
12552 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
12553   assert(N->getOpcode() == ISD::BUILD_PAIR);
12554
12555   auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
12556   auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
12557
12558   // A BUILD_PAIR is always having the least significant part in elt 0 and the
12559   // most significant part in elt 1. So when combining into one large load, we
12560   // need to consider the endianness.
12561   if (DAG.getDataLayout().isBigEndian())
12562     std::swap(LD1, LD2);
12563
12564   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
12565       !LD1->hasOneUse() || !LD2->hasOneUse() ||
12566       LD1->getAddressSpace() != LD2->getAddressSpace())
12567     return SDValue();
12568
12569   EVT LD1VT = LD1->getValueType(0);
12570   unsigned LD1Bytes = LD1VT.getStoreSize();
12571   if (DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
12572     Align Alignment = LD1->getAlign();
12573     Align NewAlign = DAG.getDataLayout().getABITypeAlign(
12574         VT.getTypeForEVT(*DAG.getContext()));
12575
12576     if (NewAlign <= Alignment &&
12577         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
12578       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
12579                          LD1->getPointerInfo(), Alignment);
12580   }
12581
12582   return SDValue();
12583 }
12584
12585 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
12586   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
12587   // and Lo parts; on big-endian machines it doesn't.
12588   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
12589 }
12590
12591 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
12592                                     const TargetLowering &TLI) {
12593   // If this is not a bitcast to an FP type or if the target doesn't have
12594   // IEEE754-compliant FP logic, we're done.
12595   EVT VT = N->getValueType(0);
12596   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
12597     return SDValue();
12598
12599   // TODO: Handle cases where the integer constant is a different scalar
12600   // bitwidth to the FP.
12601   SDValue N0 = N->getOperand(0);
12602   EVT SourceVT = N0.getValueType();
12603   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
12604     return SDValue();
12605
12606   unsigned FPOpcode;
12607   APInt SignMask;
12608   switch (N0.getOpcode()) {
12609   case ISD::AND:
12610     FPOpcode = ISD::FABS;
12611     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
12612     break;
12613   case ISD::XOR:
12614     FPOpcode = ISD::FNEG;
12615     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12616     break;
12617   case ISD::OR:
12618     FPOpcode = ISD::FABS;
12619     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12620     break;
12621   default:
12622     return SDValue();
12623   }
12624
12625   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
12626   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
12627   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
12628   //   fneg (fabs X)
12629   SDValue LogicOp0 = N0.getOperand(0);
12630   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
12631   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
12632       LogicOp0.getOpcode() == ISD::BITCAST &&
12633       LogicOp0.getOperand(0).getValueType() == VT) {
12634     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
12635     NumFPLogicOpsConv++;
12636     if (N0.getOpcode() == ISD::OR)
12637       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
12638     return FPOp;
12639   }
12640
12641   return SDValue();
12642 }
12643
12644 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
12645   SDValue N0 = N->getOperand(0);
12646   EVT VT = N->getValueType(0);
12647
12648   if (N0.isUndef())
12649     return DAG.getUNDEF(VT);
12650
12651   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
12652   // Only do this before legalize types, unless both types are integer and the
12653   // scalar type is legal. Only do this before legalize ops, since the target
12654   // maybe depending on the bitcast.
12655   // First check to see if this is all constant.
12656   // TODO: Support FP bitcasts after legalize types.
12657   if (VT.isVector() &&
12658       (!LegalTypes ||
12659        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
12660         TLI.isTypeLegal(VT.getVectorElementType()))) &&
12661       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
12662       cast<BuildVectorSDNode>(N0)->isConstant())
12663     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
12664                                              VT.getVectorElementType());
12665
12666   // If the input is a constant, let getNode fold it.
12667   if (isIntOrFPConstant(N0)) {
12668     // If we can't allow illegal operations, we need to check that this is just
12669     // a fp -> int or int -> conversion and that the resulting operation will
12670     // be legal.
12671     if (!LegalOperations ||
12672         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
12673          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
12674         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
12675          TLI.isOperationLegal(ISD::Constant, VT))) {
12676       SDValue C = DAG.getBitcast(VT, N0);
12677       if (C.getNode() != N)
12678         return C;
12679     }
12680   }
12681
12682   // (conv (conv x, t1), t2) -> (conv x, t2)
12683   if (N0.getOpcode() == ISD::BITCAST)
12684     return DAG.getBitcast(VT, N0.getOperand(0));
12685
12686   // fold (conv (load x)) -> (load (conv*)x)
12687   // If the resultant load doesn't need a higher alignment than the original!
12688   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12689       // Do not remove the cast if the types differ in endian layout.
12690       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
12691           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
12692       // If the load is volatile, we only want to change the load type if the
12693       // resulting load is legal. Otherwise we might increase the number of
12694       // memory accesses. We don't care if the original type was legal or not
12695       // as we assume software couldn't rely on the number of accesses of an
12696       // illegal type.
12697       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
12698        TLI.isOperationLegal(ISD::LOAD, VT))) {
12699     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12700
12701     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
12702                                     *LN0->getMemOperand())) {
12703       SDValue Load =
12704           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12705                       LN0->getPointerInfo(), LN0->getAlign(),
12706                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12707       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12708       return Load;
12709     }
12710   }
12711
12712   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
12713     return V;
12714
12715   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12716   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12717   //
12718   // For ppc_fp128:
12719   // fold (bitcast (fneg x)) ->
12720   //     flipbit = signbit
12721   //     (xor (bitcast x) (build_pair flipbit, flipbit))
12722   //
12723   // fold (bitcast (fabs x)) ->
12724   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
12725   //     (xor (bitcast x) (build_pair flipbit, flipbit))
12726   // This often reduces constant pool loads.
12727   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
12728        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
12729       N0.getNode()->hasOneUse() && VT.isInteger() &&
12730       !VT.isVector() && !N0.getValueType().isVector()) {
12731     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
12732     AddToWorklist(NewConv.getNode());
12733
12734     SDLoc DL(N);
12735     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12736       assert(VT.getSizeInBits() == 128);
12737       SDValue SignBit = DAG.getConstant(
12738           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
12739       SDValue FlipBit;
12740       if (N0.getOpcode() == ISD::FNEG) {
12741         FlipBit = SignBit;
12742         AddToWorklist(FlipBit.getNode());
12743       } else {
12744         assert(N0.getOpcode() == ISD::FABS);
12745         SDValue Hi =
12746             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
12747                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12748                                               SDLoc(NewConv)));
12749         AddToWorklist(Hi.getNode());
12750         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
12751         AddToWorklist(FlipBit.getNode());
12752       }
12753       SDValue FlipBits =
12754           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12755       AddToWorklist(FlipBits.getNode());
12756       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
12757     }
12758     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12759     if (N0.getOpcode() == ISD::FNEG)
12760       return DAG.getNode(ISD::XOR, DL, VT,
12761                          NewConv, DAG.getConstant(SignBit, DL, VT));
12762     assert(N0.getOpcode() == ISD::FABS);
12763     return DAG.getNode(ISD::AND, DL, VT,
12764                        NewConv, DAG.getConstant(~SignBit, DL, VT));
12765   }
12766
12767   // fold (bitconvert (fcopysign cst, x)) ->
12768   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
12769   // Note that we don't handle (copysign x, cst) because this can always be
12770   // folded to an fneg or fabs.
12771   //
12772   // For ppc_fp128:
12773   // fold (bitcast (fcopysign cst, x)) ->
12774   //     flipbit = (and (extract_element
12775   //                     (xor (bitcast cst), (bitcast x)), 0),
12776   //                    signbit)
12777   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
12778   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
12779       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
12780       VT.isInteger() && !VT.isVector()) {
12781     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
12782     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
12783     if (isTypeLegal(IntXVT)) {
12784       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
12785       AddToWorklist(X.getNode());
12786
12787       // If X has a different width than the result/lhs, sext it or truncate it.
12788       unsigned VTWidth = VT.getSizeInBits();
12789       if (OrigXWidth < VTWidth) {
12790         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
12791         AddToWorklist(X.getNode());
12792       } else if (OrigXWidth > VTWidth) {
12793         // To get the sign bit in the right place, we have to shift it right
12794         // before truncating.
12795         SDLoc DL(X);
12796         X = DAG.getNode(ISD::SRL, DL,
12797                         X.getValueType(), X,
12798                         DAG.getConstant(OrigXWidth-VTWidth, DL,
12799                                         X.getValueType()));
12800         AddToWorklist(X.getNode());
12801         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
12802         AddToWorklist(X.getNode());
12803       }
12804
12805       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12806         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
12807         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12808         AddToWorklist(Cst.getNode());
12809         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
12810         AddToWorklist(X.getNode());
12811         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
12812         AddToWorklist(XorResult.getNode());
12813         SDValue XorResult64 = DAG.getNode(
12814             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
12815             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12816                                   SDLoc(XorResult)));
12817         AddToWorklist(XorResult64.getNode());
12818         SDValue FlipBit =
12819             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
12820                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
12821         AddToWorklist(FlipBit.getNode());
12822         SDValue FlipBits =
12823             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12824         AddToWorklist(FlipBits.getNode());
12825         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
12826       }
12827       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12828       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
12829                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
12830       AddToWorklist(X.getNode());
12831
12832       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12833       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
12834                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
12835       AddToWorklist(Cst.getNode());
12836
12837       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
12838     }
12839   }
12840
12841   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
12842   if (N0.getOpcode() == ISD::BUILD_PAIR)
12843     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
12844       return CombineLD;
12845
12846   // Remove double bitcasts from shuffles - this is often a legacy of
12847   // XformToShuffleWithZero being used to combine bitmaskings (of
12848   // float vectors bitcast to integer vectors) into shuffles.
12849   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
12850   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
12851       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
12852       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
12853       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
12854     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
12855
12856     // If operands are a bitcast, peek through if it casts the original VT.
12857     // If operands are a constant, just bitcast back to original VT.
12858     auto PeekThroughBitcast = [&](SDValue Op) {
12859       if (Op.getOpcode() == ISD::BITCAST &&
12860           Op.getOperand(0).getValueType() == VT)
12861         return SDValue(Op.getOperand(0));
12862       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
12863           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
12864         return DAG.getBitcast(VT, Op);
12865       return SDValue();
12866     };
12867
12868     // FIXME: If either input vector is bitcast, try to convert the shuffle to
12869     // the result type of this bitcast. This would eliminate at least one
12870     // bitcast. See the transform in InstCombine.
12871     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
12872     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
12873     if (!(SV0 && SV1))
12874       return SDValue();
12875
12876     int MaskScale =
12877         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
12878     SmallVector<int, 8> NewMask;
12879     for (int M : SVN->getMask())
12880       for (int i = 0; i != MaskScale; ++i)
12881         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
12882
12883     SDValue LegalShuffle =
12884         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
12885     if (LegalShuffle)
12886       return LegalShuffle;
12887   }
12888
12889   return SDValue();
12890 }
12891
12892 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
12893   EVT VT = N->getValueType(0);
12894   return CombineConsecutiveLoads(N, VT);
12895 }
12896
12897 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
12898   SDValue N0 = N->getOperand(0);
12899
12900   if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
12901     return N0;
12902
12903   return SDValue();
12904 }
12905
12906 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
12907 /// operands. DstEltVT indicates the destination element value type.
12908 SDValue DAGCombiner::
12909 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
12910   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
12911
12912   // If this is already the right type, we're done.
12913   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
12914
12915   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
12916   unsigned DstBitSize = DstEltVT.getSizeInBits();
12917
12918   // If this is a conversion of N elements of one type to N elements of another
12919   // type, convert each element.  This handles FP<->INT cases.
12920   if (SrcBitSize == DstBitSize) {
12921     SmallVector<SDValue, 8> Ops;
12922     for (SDValue Op : BV->op_values()) {
12923       // If the vector element type is not legal, the BUILD_VECTOR operands
12924       // are promoted and implicitly truncated.  Make that explicit here.
12925       if (Op.getValueType() != SrcEltVT)
12926         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
12927       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
12928       AddToWorklist(Ops.back().getNode());
12929     }
12930     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12931                               BV->getValueType(0).getVectorNumElements());
12932     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
12933   }
12934
12935   // Otherwise, we're growing or shrinking the elements.  To avoid having to
12936   // handle annoying details of growing/shrinking FP values, we convert them to
12937   // int first.
12938   if (SrcEltVT.isFloatingPoint()) {
12939     // Convert the input float vector to a int vector where the elements are the
12940     // same sizes.
12941     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
12942     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
12943     SrcEltVT = IntVT;
12944   }
12945
12946   // Now we know the input is an integer vector.  If the output is a FP type,
12947   // convert to integer first, then to FP of the right size.
12948   if (DstEltVT.isFloatingPoint()) {
12949     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
12950     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
12951
12952     // Next, convert to FP elements of the same size.
12953     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
12954   }
12955
12956   SDLoc DL(BV);
12957
12958   // Okay, we know the src/dst types are both integers of differing types.
12959   // Handling growing first.
12960   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
12961   if (SrcBitSize < DstBitSize) {
12962     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
12963
12964     SmallVector<SDValue, 8> Ops;
12965     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
12966          i += NumInputsPerOutput) {
12967       bool isLE = DAG.getDataLayout().isLittleEndian();
12968       APInt NewBits = APInt(DstBitSize, 0);
12969       bool EltIsUndef = true;
12970       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
12971         // Shift the previously computed bits over.
12972         NewBits <<= SrcBitSize;
12973         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
12974         if (Op.isUndef()) continue;
12975         EltIsUndef = false;
12976
12977         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
12978                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
12979       }
12980
12981       if (EltIsUndef)
12982         Ops.push_back(DAG.getUNDEF(DstEltVT));
12983       else
12984         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
12985     }
12986
12987     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
12988     return DAG.getBuildVector(VT, DL, Ops);
12989   }
12990
12991   // Finally, this must be the case where we are shrinking elements: each input
12992   // turns into multiple outputs.
12993   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
12994   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12995                             NumOutputsPerInput*BV->getNumOperands());
12996   SmallVector<SDValue, 8> Ops;
12997
12998   for (const SDValue &Op : BV->op_values()) {
12999     if (Op.isUndef()) {
13000       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
13001       continue;
13002     }
13003
13004     APInt OpVal = cast<ConstantSDNode>(Op)->
13005                   getAPIntValue().zextOrTrunc(SrcBitSize);
13006
13007     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
13008       APInt ThisVal = OpVal.trunc(DstBitSize);
13009       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
13010       OpVal.lshrInPlace(DstBitSize);
13011     }
13012
13013     // For big endian targets, swap the order of the pieces of each element.
13014     if (DAG.getDataLayout().isBigEndian())
13015       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
13016   }
13017
13018   return DAG.getBuildVector(VT, DL, Ops);
13019 }
13020
13021 /// Try to perform FMA combining on a given FADD node.
13022 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13023   SDValue N0 = N->getOperand(0);
13024   SDValue N1 = N->getOperand(1);
13025   EVT VT = N->getValueType(0);
13026   SDLoc SL(N);
13027
13028   const TargetOptions &Options = DAG.getTarget().Options;
13029
13030   // Floating-point multiply-add with intermediate rounding.
13031   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13032
13033   // Floating-point multiply-add without intermediate rounding.
13034   bool HasFMA =
13035       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13036       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13037
13038   // No valid opcode, do not combine.
13039   if (!HasFMAD && !HasFMA)
13040     return SDValue();
13041
13042   bool CanReassociate =
13043       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13044   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13045                               Options.UnsafeFPMath || HasFMAD);
13046   // If the addition is not contractable, do not combine.
13047   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13048     return SDValue();
13049
13050   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13051     return SDValue();
13052
13053   // Always prefer FMAD to FMA for precision.
13054   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13055   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13056
13057   // Is the node an FMUL and contractable either due to global flags or
13058   // SDNodeFlags.
13059   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13060     if (N.getOpcode() != ISD::FMUL)
13061       return false;
13062     return AllowFusionGlobally || N->getFlags().hasAllowContract();
13063   };
13064   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13065   // prefer to fold the multiply with fewer uses.
13066   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
13067     if (N0.getNode()->use_size() > N1.getNode()->use_size())
13068       std::swap(N0, N1);
13069   }
13070
13071   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13072   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13073     return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13074                        N0.getOperand(1), N1);
13075   }
13076
13077   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13078   // Note: Commutes FADD operands.
13079   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13080     return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13081                        N1.getOperand(1), N0);
13082   }
13083
13084   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13085   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13086   // This requires reassociation because it changes the order of operations.
13087   SDValue FMA, E;
13088   if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
13089       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13090       N0.getOperand(2).hasOneUse()) {
13091     FMA = N0;
13092     E = N1;
13093   } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
13094              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13095              N1.getOperand(2).hasOneUse()) {
13096     FMA = N1;
13097     E = N0;
13098   }
13099   if (FMA && E) {
13100     SDValue A = FMA.getOperand(0);
13101     SDValue B = FMA.getOperand(1);
13102     SDValue C = FMA.getOperand(2).getOperand(0);
13103     SDValue D = FMA.getOperand(2).getOperand(1);
13104     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13105     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13106   }
13107
13108   // Look through FP_EXTEND nodes to do more combining.
13109
13110   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13111   if (N0.getOpcode() == ISD::FP_EXTEND) {
13112     SDValue N00 = N0.getOperand(0);
13113     if (isContractableFMUL(N00) &&
13114         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13115                             N00.getValueType())) {
13116       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13117                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13118                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13119                          N1);
13120     }
13121   }
13122
13123   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13124   // Note: Commutes FADD operands.
13125   if (N1.getOpcode() == ISD::FP_EXTEND) {
13126     SDValue N10 = N1.getOperand(0);
13127     if (isContractableFMUL(N10) &&
13128         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13129                             N10.getValueType())) {
13130       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13131                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13132                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13133                          N0);
13134     }
13135   }
13136
13137   // More folding opportunities when target permits.
13138   if (Aggressive) {
13139     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13140     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
13141     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13142                                     SDValue Z) {
13143       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13144                          DAG.getNode(PreferredFusedOpcode, SL, VT,
13145                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13146                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13147                                      Z));
13148     };
13149     if (N0.getOpcode() == PreferredFusedOpcode) {
13150       SDValue N02 = N0.getOperand(2);
13151       if (N02.getOpcode() == ISD::FP_EXTEND) {
13152         SDValue N020 = N02.getOperand(0);
13153         if (isContractableFMUL(N020) &&
13154             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13155                                 N020.getValueType())) {
13156           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13157                                       N020.getOperand(0), N020.getOperand(1),
13158                                       N1);
13159         }
13160       }
13161     }
13162
13163     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13164     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13165     // FIXME: This turns two single-precision and one double-precision
13166     // operation into two double-precision operations, which might not be
13167     // interesting for all targets, especially GPUs.
13168     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13169                                     SDValue Z) {
13170       return DAG.getNode(
13171           PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13172           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13173           DAG.getNode(PreferredFusedOpcode, SL, VT,
13174                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13175                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13176     };
13177     if (N0.getOpcode() == ISD::FP_EXTEND) {
13178       SDValue N00 = N0.getOperand(0);
13179       if (N00.getOpcode() == PreferredFusedOpcode) {
13180         SDValue N002 = N00.getOperand(2);
13181         if (isContractableFMUL(N002) &&
13182             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13183                                 N00.getValueType())) {
13184           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13185                                       N002.getOperand(0), N002.getOperand(1),
13186                                       N1);
13187         }
13188       }
13189     }
13190
13191     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13192     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
13193     if (N1.getOpcode() == PreferredFusedOpcode) {
13194       SDValue N12 = N1.getOperand(2);
13195       if (N12.getOpcode() == ISD::FP_EXTEND) {
13196         SDValue N120 = N12.getOperand(0);
13197         if (isContractableFMUL(N120) &&
13198             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13199                                 N120.getValueType())) {
13200           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13201                                       N120.getOperand(0), N120.getOperand(1),
13202                                       N0);
13203         }
13204       }
13205     }
13206
13207     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13208     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13209     // FIXME: This turns two single-precision and one double-precision
13210     // operation into two double-precision operations, which might not be
13211     // interesting for all targets, especially GPUs.
13212     if (N1.getOpcode() == ISD::FP_EXTEND) {
13213       SDValue N10 = N1.getOperand(0);
13214       if (N10.getOpcode() == PreferredFusedOpcode) {
13215         SDValue N102 = N10.getOperand(2);
13216         if (isContractableFMUL(N102) &&
13217             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13218                                 N10.getValueType())) {
13219           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13220                                       N102.getOperand(0), N102.getOperand(1),
13221                                       N0);
13222         }
13223       }
13224     }
13225   }
13226
13227   return SDValue();
13228 }
13229
13230 /// Try to perform FMA combining on a given FSUB node.
13231 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13232   SDValue N0 = N->getOperand(0);
13233   SDValue N1 = N->getOperand(1);
13234   EVT VT = N->getValueType(0);
13235   SDLoc SL(N);
13236
13237   const TargetOptions &Options = DAG.getTarget().Options;
13238   // Floating-point multiply-add with intermediate rounding.
13239   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13240
13241   // Floating-point multiply-add without intermediate rounding.
13242   bool HasFMA =
13243       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13244       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13245
13246   // No valid opcode, do not combine.
13247   if (!HasFMAD && !HasFMA)
13248     return SDValue();
13249
13250   const SDNodeFlags Flags = N->getFlags();
13251   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13252                               Options.UnsafeFPMath || HasFMAD);
13253
13254   // If the subtraction is not contractable, do not combine.
13255   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13256     return SDValue();
13257
13258   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13259     return SDValue();
13260
13261   // Always prefer FMAD to FMA for precision.
13262   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13263   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13264   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13265
13266   // Is the node an FMUL and contractable either due to global flags or
13267   // SDNodeFlags.
13268   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13269     if (N.getOpcode() != ISD::FMUL)
13270       return false;
13271     return AllowFusionGlobally || N->getFlags().hasAllowContract();
13272   };
13273
13274   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13275   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13276     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13277       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13278                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13279     }
13280     return SDValue();
13281   };
13282
13283   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13284   // Note: Commutes FSUB operands.
13285   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13286     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13287       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13288                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13289                          YZ.getOperand(1), X);
13290     }
13291     return SDValue();
13292   };
13293
13294   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13295   // prefer to fold the multiply with fewer uses.
13296   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13297       (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13298     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13299     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13300       return V;
13301     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13302     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13303       return V;
13304   } else {
13305     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13306     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13307       return V;
13308     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13309     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13310       return V;
13311   }
13312
13313   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13314   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13315       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13316     SDValue N00 = N0.getOperand(0).getOperand(0);
13317     SDValue N01 = N0.getOperand(0).getOperand(1);
13318     return DAG.getNode(PreferredFusedOpcode, SL, VT,
13319                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13320                        DAG.getNode(ISD::FNEG, SL, VT, N1));
13321   }
13322
13323   // Look through FP_EXTEND nodes to do more combining.
13324
13325   // fold (fsub (fpext (fmul x, y)), z)
13326   //   -> (fma (fpext x), (fpext y), (fneg z))
13327   if (N0.getOpcode() == ISD::FP_EXTEND) {
13328     SDValue N00 = N0.getOperand(0);
13329     if (isContractableFMUL(N00) &&
13330         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13331                             N00.getValueType())) {
13332       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13333                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13334                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13335                          DAG.getNode(ISD::FNEG, SL, VT, N1));
13336     }
13337   }
13338
13339   // fold (fsub x, (fpext (fmul y, z)))
13340   //   -> (fma (fneg (fpext y)), (fpext z), x)
13341   // Note: Commutes FSUB operands.
13342   if (N1.getOpcode() == ISD::FP_EXTEND) {
13343     SDValue N10 = N1.getOperand(0);
13344     if (isContractableFMUL(N10) &&
13345         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13346                             N10.getValueType())) {
13347       return DAG.getNode(
13348           PreferredFusedOpcode, SL, VT,
13349           DAG.getNode(ISD::FNEG, SL, VT,
13350                       DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13351           DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13352     }
13353   }
13354
13355   // fold (fsub (fpext (fneg (fmul, x, y))), z)
13356   //   -> (fneg (fma (fpext x), (fpext y), z))
13357   // Note: This could be removed with appropriate canonicalization of the
13358   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13359   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13360   // from implementing the canonicalization in visitFSUB.
13361   if (N0.getOpcode() == ISD::FP_EXTEND) {
13362     SDValue N00 = N0.getOperand(0);
13363     if (N00.getOpcode() == ISD::FNEG) {
13364       SDValue N000 = N00.getOperand(0);
13365       if (isContractableFMUL(N000) &&
13366           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13367                               N00.getValueType())) {
13368         return DAG.getNode(
13369             ISD::FNEG, SL, VT,
13370             DAG.getNode(PreferredFusedOpcode, SL, VT,
13371                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13372                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13373                         N1));
13374       }
13375     }
13376   }
13377
13378   // fold (fsub (fneg (fpext (fmul, x, y))), z)
13379   //   -> (fneg (fma (fpext x)), (fpext y), z)
13380   // Note: This could be removed with appropriate canonicalization of the
13381   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13382   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13383   // from implementing the canonicalization in visitFSUB.
13384   if (N0.getOpcode() == ISD::FNEG) {
13385     SDValue N00 = N0.getOperand(0);
13386     if (N00.getOpcode() == ISD::FP_EXTEND) {
13387       SDValue N000 = N00.getOperand(0);
13388       if (isContractableFMUL(N000) &&
13389           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13390                               N000.getValueType())) {
13391         return DAG.getNode(
13392             ISD::FNEG, SL, VT,
13393             DAG.getNode(PreferredFusedOpcode, SL, VT,
13394                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13395                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13396                         N1));
13397       }
13398     }
13399   }
13400
13401   auto isReassociable = [Options](SDNode *N) {
13402     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13403   };
13404
13405   auto isContractableAndReassociableFMUL = [isContractableFMUL,
13406                                             isReassociable](SDValue N) {
13407     return isContractableFMUL(N) && isReassociable(N.getNode());
13408   };
13409
13410   // More folding opportunities when target permits.
13411   if (Aggressive && isReassociable(N)) {
13412     bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
13413     // fold (fsub (fma x, y, (fmul u, v)), z)
13414     //   -> (fma x, y (fma u, v, (fneg z)))
13415     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
13416         isContractableAndReassociableFMUL(N0.getOperand(2)) &&
13417         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
13418       return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13419                          N0.getOperand(1),
13420                          DAG.getNode(PreferredFusedOpcode, SL, VT,
13421                                      N0.getOperand(2).getOperand(0),
13422                                      N0.getOperand(2).getOperand(1),
13423                                      DAG.getNode(ISD::FNEG, SL, VT, N1)));
13424     }
13425
13426     // fold (fsub x, (fma y, z, (fmul u, v)))
13427     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
13428     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
13429         isContractableAndReassociableFMUL(N1.getOperand(2)) &&
13430         N1->hasOneUse() && NoSignedZero) {
13431       SDValue N20 = N1.getOperand(2).getOperand(0);
13432       SDValue N21 = N1.getOperand(2).getOperand(1);
13433       return DAG.getNode(
13434           PreferredFusedOpcode, SL, VT,
13435           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13436           DAG.getNode(PreferredFusedOpcode, SL, VT,
13437                       DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13438     }
13439
13440     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13441     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13442     if (N0.getOpcode() == PreferredFusedOpcode &&
13443         N0->hasOneUse()) {
13444       SDValue N02 = N0.getOperand(2);
13445       if (N02.getOpcode() == ISD::FP_EXTEND) {
13446         SDValue N020 = N02.getOperand(0);
13447         if (isContractableAndReassociableFMUL(N020) &&
13448             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13449                                 N020.getValueType())) {
13450           return DAG.getNode(
13451               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13452               DAG.getNode(
13453                   PreferredFusedOpcode, SL, VT,
13454                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13455                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13456                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
13457         }
13458       }
13459     }
13460
13461     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13462     //   -> (fma (fpext x), (fpext y),
13463     //           (fma (fpext u), (fpext v), (fneg z)))
13464     // FIXME: This turns two single-precision and one double-precision
13465     // operation into two double-precision operations, which might not be
13466     // interesting for all targets, especially GPUs.
13467     if (N0.getOpcode() == ISD::FP_EXTEND) {
13468       SDValue N00 = N0.getOperand(0);
13469       if (N00.getOpcode() == PreferredFusedOpcode) {
13470         SDValue N002 = N00.getOperand(2);
13471         if (isContractableAndReassociableFMUL(N002) &&
13472             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13473                                 N00.getValueType())) {
13474           return DAG.getNode(
13475               PreferredFusedOpcode, SL, VT,
13476               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13477               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13478               DAG.getNode(
13479                   PreferredFusedOpcode, SL, VT,
13480                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13481                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13482                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
13483         }
13484       }
13485     }
13486
13487     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13488     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13489     if (N1.getOpcode() == PreferredFusedOpcode &&
13490         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13491         N1->hasOneUse()) {
13492       SDValue N120 = N1.getOperand(2).getOperand(0);
13493       if (isContractableAndReassociableFMUL(N120) &&
13494           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13495                               N120.getValueType())) {
13496         SDValue N1200 = N120.getOperand(0);
13497         SDValue N1201 = N120.getOperand(1);
13498         return DAG.getNode(
13499             PreferredFusedOpcode, SL, VT,
13500             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13501             DAG.getNode(PreferredFusedOpcode, SL, VT,
13502                         DAG.getNode(ISD::FNEG, SL, VT,
13503                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13504                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13505       }
13506     }
13507
13508     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13509     //   -> (fma (fneg (fpext y)), (fpext z),
13510     //           (fma (fneg (fpext u)), (fpext v), x))
13511     // FIXME: This turns two single-precision and one double-precision
13512     // operation into two double-precision operations, which might not be
13513     // interesting for all targets, especially GPUs.
13514     if (N1.getOpcode() == ISD::FP_EXTEND &&
13515         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
13516       SDValue CvtSrc = N1.getOperand(0);
13517       SDValue N100 = CvtSrc.getOperand(0);
13518       SDValue N101 = CvtSrc.getOperand(1);
13519       SDValue N102 = CvtSrc.getOperand(2);
13520       if (isContractableAndReassociableFMUL(N102) &&
13521           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13522                               CvtSrc.getValueType())) {
13523         SDValue N1020 = N102.getOperand(0);
13524         SDValue N1021 = N102.getOperand(1);
13525         return DAG.getNode(
13526             PreferredFusedOpcode, SL, VT,
13527             DAG.getNode(ISD::FNEG, SL, VT,
13528                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13529             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13530             DAG.getNode(PreferredFusedOpcode, SL, VT,
13531                         DAG.getNode(ISD::FNEG, SL, VT,
13532                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13533                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13534       }
13535     }
13536   }
13537
13538   return SDValue();
13539 }
13540
13541 /// Try to perform FMA combining on a given FMUL node based on the distributive
13542 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13543 /// subtraction instead of addition).
13544 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13545   SDValue N0 = N->getOperand(0);
13546   SDValue N1 = N->getOperand(1);
13547   EVT VT = N->getValueType(0);
13548   SDLoc SL(N);
13549
13550   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
13551
13552   const TargetOptions &Options = DAG.getTarget().Options;
13553
13554   // The transforms below are incorrect when x == 0 and y == inf, because the
13555   // intermediate multiplication produces a nan.
13556   if (!Options.NoInfsFPMath)
13557     return SDValue();
13558
13559   // Floating-point multiply-add without intermediate rounding.
13560   bool HasFMA =
13561       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
13562       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13563       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13564
13565   // Floating-point multiply-add with intermediate rounding. This can result
13566   // in a less precise result due to the changed rounding order.
13567   bool HasFMAD = Options.UnsafeFPMath &&
13568                  (LegalOperations && TLI.isFMADLegal(DAG, N));
13569
13570   // No valid opcode, do not combine.
13571   if (!HasFMAD && !HasFMA)
13572     return SDValue();
13573
13574   // Always prefer FMAD to FMA for precision.
13575   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13576   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13577
13578   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
13579   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
13580   auto FuseFADD = [&](SDValue X, SDValue Y) {
13581     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
13582       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
13583         if (C->isExactlyValue(+1.0))
13584           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13585                              Y);
13586         if (C->isExactlyValue(-1.0))
13587           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13588                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13589       }
13590     }
13591     return SDValue();
13592   };
13593
13594   if (SDValue FMA = FuseFADD(N0, N1))
13595     return FMA;
13596   if (SDValue FMA = FuseFADD(N1, N0))
13597     return FMA;
13598
13599   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
13600   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
13601   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
13602   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
13603   auto FuseFSUB = [&](SDValue X, SDValue Y) {
13604     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
13605       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
13606         if (C0->isExactlyValue(+1.0))
13607           return DAG.getNode(PreferredFusedOpcode, SL, VT,
13608                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13609                              Y);
13610         if (C0->isExactlyValue(-1.0))
13611           return DAG.getNode(PreferredFusedOpcode, SL, VT,
13612                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13613                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13614       }
13615       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
13616         if (C1->isExactlyValue(+1.0))
13617           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13618                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13619         if (C1->isExactlyValue(-1.0))
13620           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13621                              Y);
13622       }
13623     }
13624     return SDValue();
13625   };
13626
13627   if (SDValue FMA = FuseFSUB(N0, N1))
13628     return FMA;
13629   if (SDValue FMA = FuseFSUB(N1, N0))
13630     return FMA;
13631
13632   return SDValue();
13633 }
13634
13635 SDValue DAGCombiner::visitFADD(SDNode *N) {
13636   SDValue N0 = N->getOperand(0);
13637   SDValue N1 = N->getOperand(1);
13638   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
13639   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
13640   EVT VT = N->getValueType(0);
13641   SDLoc DL(N);
13642   const TargetOptions &Options = DAG.getTarget().Options;
13643   SDNodeFlags Flags = N->getFlags();
13644   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13645
13646   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13647     return R;
13648
13649   // fold vector ops
13650   if (VT.isVector())
13651     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13652       return FoldedVOp;
13653
13654   // fold (fadd c1, c2) -> c1 + c2
13655   if (N0CFP && N1CFP)
13656     return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
13657
13658   // canonicalize constant to RHS
13659   if (N0CFP && !N1CFP)
13660     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
13661
13662   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
13663   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
13664   if (N1C && N1C->isZero())
13665     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
13666       return N0;
13667
13668   if (SDValue NewSel = foldBinOpIntoSelect(N))
13669     return NewSel;
13670
13671   // fold (fadd A, (fneg B)) -> (fsub A, B)
13672   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13673     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13674             N1, DAG, LegalOperations, ForCodeSize))
13675       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
13676
13677   // fold (fadd (fneg A), B) -> (fsub B, A)
13678   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13679     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13680             N0, DAG, LegalOperations, ForCodeSize))
13681       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
13682
13683   auto isFMulNegTwo = [](SDValue FMul) {
13684     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
13685       return false;
13686     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
13687     return C && C->isExactlyValue(-2.0);
13688   };
13689
13690   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
13691   if (isFMulNegTwo(N0)) {
13692     SDValue B = N0.getOperand(0);
13693     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13694     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
13695   }
13696   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
13697   if (isFMulNegTwo(N1)) {
13698     SDValue B = N1.getOperand(0);
13699     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13700     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
13701   }
13702
13703   // No FP constant should be created after legalization as Instruction
13704   // Selection pass has a hard time dealing with FP constants.
13705   bool AllowNewConst = (Level < AfterLegalizeDAG);
13706
13707   // If nnan is enabled, fold lots of things.
13708   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
13709     // If allowed, fold (fadd (fneg x), x) -> 0.0
13710     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
13711       return DAG.getConstantFP(0.0, DL, VT);
13712
13713     // If allowed, fold (fadd x, (fneg x)) -> 0.0
13714     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
13715       return DAG.getConstantFP(0.0, DL, VT);
13716   }
13717
13718   // If 'unsafe math' or reassoc and nsz, fold lots of things.
13719   // TODO: break out portions of the transformations below for which Unsafe is
13720   //       considered and which do not require both nsz and reassoc
13721   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13722        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13723       AllowNewConst) {
13724     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
13725     if (N1CFP && N0.getOpcode() == ISD::FADD &&
13726         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13727       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
13728       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
13729     }
13730
13731     // We can fold chains of FADD's of the same value into multiplications.
13732     // This transform is not safe in general because we are reducing the number
13733     // of rounding steps.
13734     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
13735       if (N0.getOpcode() == ISD::FMUL) {
13736         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13737         bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
13738
13739         // (fadd (fmul x, c), x) -> (fmul x, c+1)
13740         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
13741           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13742                                        DAG.getConstantFP(1.0, DL, VT));
13743           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
13744         }
13745
13746         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
13747         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
13748             N1.getOperand(0) == N1.getOperand(1) &&
13749             N0.getOperand(0) == N1.getOperand(0)) {
13750           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13751                                        DAG.getConstantFP(2.0, DL, VT));
13752           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
13753         }
13754       }
13755
13756       if (N1.getOpcode() == ISD::FMUL) {
13757         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13758         bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
13759
13760         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
13761         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
13762           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13763                                        DAG.getConstantFP(1.0, DL, VT));
13764           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
13765         }
13766
13767         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
13768         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
13769             N0.getOperand(0) == N0.getOperand(1) &&
13770             N1.getOperand(0) == N0.getOperand(0)) {
13771           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13772                                        DAG.getConstantFP(2.0, DL, VT));
13773           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
13774         }
13775       }
13776
13777       if (N0.getOpcode() == ISD::FADD) {
13778         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13779         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
13780         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
13781             (N0.getOperand(0) == N1)) {
13782           return DAG.getNode(ISD::FMUL, DL, VT, N1,
13783                              DAG.getConstantFP(3.0, DL, VT));
13784         }
13785       }
13786
13787       if (N1.getOpcode() == ISD::FADD) {
13788         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13789         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
13790         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
13791             N1.getOperand(0) == N0) {
13792           return DAG.getNode(ISD::FMUL, DL, VT, N0,
13793                              DAG.getConstantFP(3.0, DL, VT));
13794         }
13795       }
13796
13797       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
13798       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
13799           N0.getOperand(0) == N0.getOperand(1) &&
13800           N1.getOperand(0) == N1.getOperand(1) &&
13801           N0.getOperand(0) == N1.getOperand(0)) {
13802         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
13803                            DAG.getConstantFP(4.0, DL, VT));
13804       }
13805     }
13806   } // enable-unsafe-fp-math
13807
13808   // FADD -> FMA combines:
13809   if (SDValue Fused = visitFADDForFMACombine(N)) {
13810     AddToWorklist(Fused.getNode());
13811     return Fused;
13812   }
13813   return SDValue();
13814 }
13815
13816 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
13817   SDValue Chain = N->getOperand(0);
13818   SDValue N0 = N->getOperand(1);
13819   SDValue N1 = N->getOperand(2);
13820   EVT VT = N->getValueType(0);
13821   EVT ChainVT = N->getValueType(1);
13822   SDLoc DL(N);
13823   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13824
13825   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
13826   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13827     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13828             N1, DAG, LegalOperations, ForCodeSize)) {
13829       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13830                          {Chain, N0, NegN1});
13831     }
13832
13833   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
13834   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13835     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13836             N0, DAG, LegalOperations, ForCodeSize)) {
13837       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13838                          {Chain, N1, NegN0});
13839     }
13840   return SDValue();
13841 }
13842
13843 SDValue DAGCombiner::visitFSUB(SDNode *N) {
13844   SDValue N0 = N->getOperand(0);
13845   SDValue N1 = N->getOperand(1);
13846   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13847   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13848   EVT VT = N->getValueType(0);
13849   SDLoc DL(N);
13850   const TargetOptions &Options = DAG.getTarget().Options;
13851   const SDNodeFlags Flags = N->getFlags();
13852   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13853
13854   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13855     return R;
13856
13857   // fold vector ops
13858   if (VT.isVector())
13859     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13860       return FoldedVOp;
13861
13862   // fold (fsub c1, c2) -> c1-c2
13863   if (N0CFP && N1CFP)
13864     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
13865
13866   if (SDValue NewSel = foldBinOpIntoSelect(N))
13867     return NewSel;
13868
13869   // (fsub A, 0) -> A
13870   if (N1CFP && N1CFP->isZero()) {
13871     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
13872         Flags.hasNoSignedZeros()) {
13873       return N0;
13874     }
13875   }
13876
13877   if (N0 == N1) {
13878     // (fsub x, x) -> 0.0
13879     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
13880       return DAG.getConstantFP(0.0f, DL, VT);
13881   }
13882
13883   // (fsub -0.0, N1) -> -N1
13884   if (N0CFP && N0CFP->isZero()) {
13885     if (N0CFP->isNegative() ||
13886         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
13887       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
13888       // flushed to zero, unless all users treat denorms as zero (DAZ).
13889       // FIXME: This transform will change the sign of a NaN and the behavior
13890       // of a signaling NaN. It is only valid when a NoNaN flag is present.
13891       DenormalMode DenormMode = DAG.getDenormalMode(VT);
13892       if (DenormMode == DenormalMode::getIEEE()) {
13893         if (SDValue NegN1 =
13894                 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13895           return NegN1;
13896         if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13897           return DAG.getNode(ISD::FNEG, DL, VT, N1);
13898       }
13899     }
13900   }
13901
13902   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13903        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13904       N1.getOpcode() == ISD::FADD) {
13905     // X - (X + Y) -> -Y
13906     if (N0 == N1->getOperand(0))
13907       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
13908     // X - (Y + X) -> -Y
13909     if (N0 == N1->getOperand(1))
13910       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
13911   }
13912
13913   // fold (fsub A, (fneg B)) -> (fadd A, B)
13914   if (SDValue NegN1 =
13915           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13916     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
13917
13918   // FSUB -> FMA combines:
13919   if (SDValue Fused = visitFSUBForFMACombine(N)) {
13920     AddToWorklist(Fused.getNode());
13921     return Fused;
13922   }
13923
13924   return SDValue();
13925 }
13926
13927 SDValue DAGCombiner::visitFMUL(SDNode *N) {
13928   SDValue N0 = N->getOperand(0);
13929   SDValue N1 = N->getOperand(1);
13930   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13931   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13932   EVT VT = N->getValueType(0);
13933   SDLoc DL(N);
13934   const TargetOptions &Options = DAG.getTarget().Options;
13935   const SDNodeFlags Flags = N->getFlags();
13936   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13937
13938   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13939     return R;
13940
13941   // fold vector ops
13942   if (VT.isVector()) {
13943     // This just handles C1 * C2 for vectors. Other vector folds are below.
13944     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13945       return FoldedVOp;
13946   }
13947
13948   // fold (fmul c1, c2) -> c1*c2
13949   if (N0CFP && N1CFP)
13950     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
13951
13952   // canonicalize constant to RHS
13953   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
13954      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
13955     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
13956
13957   if (SDValue NewSel = foldBinOpIntoSelect(N))
13958     return NewSel;
13959
13960   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
13961     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
13962     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13963         N0.getOpcode() == ISD::FMUL) {
13964       SDValue N00 = N0.getOperand(0);
13965       SDValue N01 = N0.getOperand(1);
13966       // Avoid an infinite loop by making sure that N00 is not a constant
13967       // (the inner multiply has not been constant folded yet).
13968       if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
13969           !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
13970         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
13971         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
13972       }
13973     }
13974
13975     // Match a special-case: we convert X * 2.0 into fadd.
13976     // fmul (fadd X, X), C -> fmul X, 2.0 * C
13977     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
13978         N0.getOperand(0) == N0.getOperand(1)) {
13979       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
13980       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
13981       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
13982     }
13983   }
13984
13985   // fold (fmul X, 2.0) -> (fadd X, X)
13986   if (N1CFP && N1CFP->isExactlyValue(+2.0))
13987     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
13988
13989   // fold (fmul X, -1.0) -> (fneg X)
13990   if (N1CFP && N1CFP->isExactlyValue(-1.0))
13991     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13992       return DAG.getNode(ISD::FNEG, DL, VT, N0);
13993
13994   // -N0 * -N1 --> N0 * N1
13995   TargetLowering::NegatibleCost CostN0 =
13996       TargetLowering::NegatibleCost::Expensive;
13997   TargetLowering::NegatibleCost CostN1 =
13998       TargetLowering::NegatibleCost::Expensive;
13999   SDValue NegN0 =
14000       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14001   SDValue NegN1 =
14002       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14003   if (NegN0 && NegN1 &&
14004       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14005        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14006     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14007
14008   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14009   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14010   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14011       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14012       TLI.isOperationLegal(ISD::FABS, VT)) {
14013     SDValue Select = N0, X = N1;
14014     if (Select.getOpcode() != ISD::SELECT)
14015       std::swap(Select, X);
14016
14017     SDValue Cond = Select.getOperand(0);
14018     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14019     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14020
14021     if (TrueOpnd && FalseOpnd &&
14022         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14023         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14024         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14025       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14026       switch (CC) {
14027       default: break;
14028       case ISD::SETOLT:
14029       case ISD::SETULT:
14030       case ISD::SETOLE:
14031       case ISD::SETULE:
14032       case ISD::SETLT:
14033       case ISD::SETLE:
14034         std::swap(TrueOpnd, FalseOpnd);
14035         LLVM_FALLTHROUGH;
14036       case ISD::SETOGT:
14037       case ISD::SETUGT:
14038       case ISD::SETOGE:
14039       case ISD::SETUGE:
14040       case ISD::SETGT:
14041       case ISD::SETGE:
14042         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14043             TLI.isOperationLegal(ISD::FNEG, VT))
14044           return DAG.getNode(ISD::FNEG, DL, VT,
14045                    DAG.getNode(ISD::FABS, DL, VT, X));
14046         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14047           return DAG.getNode(ISD::FABS, DL, VT, X);
14048
14049         break;
14050       }
14051     }
14052   }
14053
14054   // FMUL -> FMA combines:
14055   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14056     AddToWorklist(Fused.getNode());
14057     return Fused;
14058   }
14059
14060   return SDValue();
14061 }
14062
14063 SDValue DAGCombiner::visitFMA(SDNode *N) {
14064   SDValue N0 = N->getOperand(0);
14065   SDValue N1 = N->getOperand(1);
14066   SDValue N2 = N->getOperand(2);
14067   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14068   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14069   EVT VT = N->getValueType(0);
14070   SDLoc DL(N);
14071   const TargetOptions &Options = DAG.getTarget().Options;
14072   // FMA nodes have flags that propagate to the created nodes.
14073   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14074
14075   bool UnsafeFPMath =
14076       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14077
14078   // Constant fold FMA.
14079   if (isa<ConstantFPSDNode>(N0) &&
14080       isa<ConstantFPSDNode>(N1) &&
14081       isa<ConstantFPSDNode>(N2)) {
14082     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14083   }
14084
14085   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14086   TargetLowering::NegatibleCost CostN0 =
14087       TargetLowering::NegatibleCost::Expensive;
14088   TargetLowering::NegatibleCost CostN1 =
14089       TargetLowering::NegatibleCost::Expensive;
14090   SDValue NegN0 =
14091       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14092   SDValue NegN1 =
14093       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14094   if (NegN0 && NegN1 &&
14095       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14096        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14097     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14098
14099   if (UnsafeFPMath) {
14100     if (N0CFP && N0CFP->isZero())
14101       return N2;
14102     if (N1CFP && N1CFP->isZero())
14103       return N2;
14104   }
14105
14106   if (N0CFP && N0CFP->isExactlyValue(1.0))
14107     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14108   if (N1CFP && N1CFP->isExactlyValue(1.0))
14109     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14110
14111   // Canonicalize (fma c, x, y) -> (fma x, c, y)
14112   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14113      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14114     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14115
14116   if (UnsafeFPMath) {
14117     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14118     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14119         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14120         DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
14121       return DAG.getNode(ISD::FMUL, DL, VT, N0,
14122                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14123     }
14124
14125     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14126     if (N0.getOpcode() == ISD::FMUL &&
14127         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14128         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14129       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14130                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14131                          N2);
14132     }
14133   }
14134
14135   // (fma x, -1, y) -> (fadd (fneg x), y)
14136   if (N1CFP) {
14137     if (N1CFP->isExactlyValue(1.0))
14138       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14139
14140     if (N1CFP->isExactlyValue(-1.0) &&
14141         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14142       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14143       AddToWorklist(RHSNeg.getNode());
14144       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14145     }
14146
14147     // fma (fneg x), K, y -> fma x -K, y
14148     if (N0.getOpcode() == ISD::FNEG &&
14149         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14150          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14151                                               ForCodeSize)))) {
14152       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14153                          DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14154     }
14155   }
14156
14157   if (UnsafeFPMath) {
14158     // (fma x, c, x) -> (fmul x, (c+1))
14159     if (N1CFP && N0 == N2) {
14160       return DAG.getNode(
14161           ISD::FMUL, DL, VT, N0,
14162           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14163     }
14164
14165     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14166     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14167       return DAG.getNode(
14168           ISD::FMUL, DL, VT, N0,
14169           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14170     }
14171   }
14172
14173   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14174   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14175   if (!TLI.isFNegFree(VT))
14176     if (SDValue Neg = TLI.getCheaperNegatedExpression(
14177             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14178       return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14179   return SDValue();
14180 }
14181
14182 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14183 // reciprocal.
14184 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14185 // Notice that this is not always beneficial. One reason is different targets
14186 // may have different costs for FDIV and FMUL, so sometimes the cost of two
14187 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14188 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14189 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14190   // TODO: Limit this transform based on optsize/minsize - it always creates at
14191   //       least 1 extra instruction. But the perf win may be substantial enough
14192   //       that only minsize should restrict this.
14193   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
14194   const SDNodeFlags Flags = N->getFlags();
14195   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14196     return SDValue();
14197
14198   // Skip if current node is a reciprocal/fneg-reciprocal.
14199   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14200   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14201   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14202     return SDValue();
14203
14204   // Exit early if the target does not want this transform or if there can't
14205   // possibly be enough uses of the divisor to make the transform worthwhile.
14206   unsigned MinUses = TLI.combineRepeatedFPDivisors();
14207
14208   // For splat vectors, scale the number of uses by the splat factor. If we can
14209   // convert the division into a scalar op, that will likely be much faster.
14210   unsigned NumElts = 1;
14211   EVT VT = N->getValueType(0);
14212   if (VT.isVector() && DAG.isSplatValue(N1))
14213     NumElts = VT.getVectorNumElements();
14214
14215   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14216     return SDValue();
14217
14218   // Find all FDIV users of the same divisor.
14219   // Use a set because duplicates may be present in the user list.
14220   SetVector<SDNode *> Users;
14221   for (auto *U : N1->uses()) {
14222     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14223       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14224       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14225           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14226           U->getFlags().hasAllowReassociation() &&
14227           U->getFlags().hasNoSignedZeros())
14228         continue;
14229
14230       // This division is eligible for optimization only if global unsafe math
14231       // is enabled or if this division allows reciprocal formation.
14232       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14233         Users.insert(U);
14234     }
14235   }
14236
14237   // Now that we have the actual number of divisor uses, make sure it meets
14238   // the minimum threshold specified by the target.
14239   if ((Users.size() * NumElts) < MinUses)
14240     return SDValue();
14241
14242   SDLoc DL(N);
14243   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14244   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14245
14246   // Dividend / Divisor -> Dividend * Reciprocal
14247   for (auto *U : Users) {
14248     SDValue Dividend = U->getOperand(0);
14249     if (Dividend != FPOne) {
14250       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14251                                     Reciprocal, Flags);
14252       CombineTo(U, NewNode);
14253     } else if (U != Reciprocal.getNode()) {
14254       // In the absence of fast-math-flags, this user node is always the
14255       // same node as Reciprocal, but with FMF they may be different nodes.
14256       CombineTo(U, Reciprocal);
14257     }
14258   }
14259   return SDValue(N, 0);  // N was replaced.
14260 }
14261
14262 SDValue DAGCombiner::visitFDIV(SDNode *N) {
14263   SDValue N0 = N->getOperand(0);
14264   SDValue N1 = N->getOperand(1);
14265   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14266   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14267   EVT VT = N->getValueType(0);
14268   SDLoc DL(N);
14269   const TargetOptions &Options = DAG.getTarget().Options;
14270   SDNodeFlags Flags = N->getFlags();
14271   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14272
14273   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14274     return R;
14275
14276   // fold vector ops
14277   if (VT.isVector())
14278     if (SDValue FoldedVOp = SimplifyVBinOp(N))
14279       return FoldedVOp;
14280
14281   // fold (fdiv c1, c2) -> c1/c2
14282   if (N0CFP && N1CFP)
14283     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
14284
14285   if (SDValue NewSel = foldBinOpIntoSelect(N))
14286     return NewSel;
14287
14288   if (SDValue V = combineRepeatedFPDivisors(N))
14289     return V;
14290
14291   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14292     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14293     if (N1CFP) {
14294       // Compute the reciprocal 1.0 / c2.
14295       const APFloat &N1APF = N1CFP->getValueAPF();
14296       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14297       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14298       // Only do the transform if the reciprocal is a legal fp immediate that
14299       // isn't too nasty (eg NaN, denormal, ...).
14300       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14301           (!LegalOperations ||
14302            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14303            // backend)... we should handle this gracefully after Legalize.
14304            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14305            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14306            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14307         return DAG.getNode(ISD::FMUL, DL, VT, N0,
14308                            DAG.getConstantFP(Recip, DL, VT));
14309     }
14310
14311     // If this FDIV is part of a reciprocal square root, it may be folded
14312     // into a target-specific square root estimate instruction.
14313     if (N1.getOpcode() == ISD::FSQRT) {
14314       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14315         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14316     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14317                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14318       if (SDValue RV =
14319               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14320         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14321         AddToWorklist(RV.getNode());
14322         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14323       }
14324     } else if (N1.getOpcode() == ISD::FP_ROUND &&
14325                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14326       if (SDValue RV =
14327               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14328         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14329         AddToWorklist(RV.getNode());
14330         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14331       }
14332     } else if (N1.getOpcode() == ISD::FMUL) {
14333       // Look through an FMUL. Even though this won't remove the FDIV directly,
14334       // it's still worthwhile to get rid of the FSQRT if possible.
14335       SDValue Sqrt, Y;
14336       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14337         Sqrt = N1.getOperand(0);
14338         Y = N1.getOperand(1);
14339       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14340         Sqrt = N1.getOperand(1);
14341         Y = N1.getOperand(0);
14342       }
14343       if (Sqrt.getNode()) {
14344         // If the other multiply operand is known positive, pull it into the
14345         // sqrt. That will eliminate the division if we convert to an estimate.
14346         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14347             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14348           SDValue A;
14349           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14350             A = Y.getOperand(0);
14351           else if (Y == Sqrt.getOperand(0))
14352             A = Y;
14353           if (A) {
14354             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14355             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14356             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14357             SDValue AAZ =
14358                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14359             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14360               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14361
14362             // Estimate creation failed. Clean up speculatively created nodes.
14363             recursivelyDeleteUnusedNodes(AAZ.getNode());
14364           }
14365         }
14366
14367         // We found a FSQRT, so try to make this fold:
14368         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14369         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14370           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14371           AddToWorklist(Div.getNode());
14372           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14373         }
14374       }
14375     }
14376
14377     // Fold into a reciprocal estimate and multiply instead of a real divide.
14378     if (Options.NoInfsFPMath || Flags.hasNoInfs())
14379       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14380         return RV;
14381   }
14382
14383   // Fold X/Sqrt(X) -> Sqrt(X)
14384   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14385       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14386     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14387       return N1;
14388
14389   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14390   TargetLowering::NegatibleCost CostN0 =
14391       TargetLowering::NegatibleCost::Expensive;
14392   TargetLowering::NegatibleCost CostN1 =
14393       TargetLowering::NegatibleCost::Expensive;
14394   SDValue NegN0 =
14395       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14396   SDValue NegN1 =
14397       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14398   if (NegN0 && NegN1 &&
14399       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14400        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14401     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14402
14403   return SDValue();
14404 }
14405
14406 SDValue DAGCombiner::visitFREM(SDNode *N) {
14407   SDValue N0 = N->getOperand(0);
14408   SDValue N1 = N->getOperand(1);
14409   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14410   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14411   EVT VT = N->getValueType(0);
14412   SDNodeFlags Flags = N->getFlags();
14413   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14414
14415   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14416     return R;
14417
14418   // fold (frem c1, c2) -> fmod(c1,c2)
14419   if (N0CFP && N1CFP)
14420     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
14421
14422   if (SDValue NewSel = foldBinOpIntoSelect(N))
14423     return NewSel;
14424
14425   return SDValue();
14426 }
14427
14428 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14429   SDNodeFlags Flags = N->getFlags();
14430   const TargetOptions &Options = DAG.getTarget().Options;
14431
14432   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14433   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14434   if (!Flags.hasApproximateFuncs() ||
14435       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14436     return SDValue();
14437
14438   SDValue N0 = N->getOperand(0);
14439   if (TLI.isFsqrtCheap(N0, DAG))
14440     return SDValue();
14441
14442   // FSQRT nodes have flags that propagate to the created nodes.
14443   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14444   //       transform the fdiv, we may produce a sub-optimal estimate sequence
14445   //       because the reciprocal calculation may not have to filter out a
14446   //       0.0 input.
14447   return buildSqrtEstimate(N0, Flags);
14448 }
14449
14450 /// copysign(x, fp_extend(y)) -> copysign(x, y)
14451 /// copysign(x, fp_round(y)) -> copysign(x, y)
14452 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
14453   SDValue N1 = N->getOperand(1);
14454   if ((N1.getOpcode() == ISD::FP_EXTEND ||
14455        N1.getOpcode() == ISD::FP_ROUND)) {
14456     EVT N1VT = N1->getValueType(0);
14457     EVT N1Op0VT = N1->getOperand(0).getValueType();
14458
14459     // Always fold no-op FP casts.
14460     if (N1VT == N1Op0VT)
14461       return true;
14462
14463     // Do not optimize out type conversion of f128 type yet.
14464     // For some targets like x86_64, configuration is changed to keep one f128
14465     // value in one SSE register, but instruction selection cannot handle
14466     // FCOPYSIGN on SSE registers yet.
14467     if (N1Op0VT == MVT::f128)
14468       return false;
14469
14470     // Avoid mismatched vector operand types, for better instruction selection.
14471     if (N1Op0VT.isVector())
14472       return false;
14473
14474     return true;
14475   }
14476   return false;
14477 }
14478
14479 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14480   SDValue N0 = N->getOperand(0);
14481   SDValue N1 = N->getOperand(1);
14482   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14483   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14484   EVT VT = N->getValueType(0);
14485
14486   if (N0CFP && N1CFP) // Constant fold
14487     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
14488
14489   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14490     const APFloat &V = N1C->getValueAPF();
14491     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
14492     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14493     if (!V.isNegative()) {
14494       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14495         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14496     } else {
14497       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14498         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14499                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14500     }
14501   }
14502
14503   // copysign(fabs(x), y) -> copysign(x, y)
14504   // copysign(fneg(x), y) -> copysign(x, y)
14505   // copysign(copysign(x,z), y) -> copysign(x, y)
14506   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14507       N0.getOpcode() == ISD::FCOPYSIGN)
14508     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14509
14510   // copysign(x, abs(y)) -> abs(x)
14511   if (N1.getOpcode() == ISD::FABS)
14512     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14513
14514   // copysign(x, copysign(y,z)) -> copysign(x, z)
14515   if (N1.getOpcode() == ISD::FCOPYSIGN)
14516     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14517
14518   // copysign(x, fp_extend(y)) -> copysign(x, y)
14519   // copysign(x, fp_round(y)) -> copysign(x, y)
14520   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
14521     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14522
14523   return SDValue();
14524 }
14525
14526 SDValue DAGCombiner::visitFPOW(SDNode *N) {
14527   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14528   if (!ExponentC)
14529     return SDValue();
14530   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14531
14532   // Try to convert x ** (1/3) into cube root.
14533   // TODO: Handle the various flavors of long double.
14534   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14535   //       Some range near 1/3 should be fine.
14536   EVT VT = N->getValueType(0);
14537   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14538       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14539     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14540     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14541     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
14542     // For regular numbers, rounding may cause the results to differ.
14543     // Therefore, we require { nsz ninf nnan afn } for this transform.
14544     // TODO: We could select out the special cases if we don't have nsz/ninf.
14545     SDNodeFlags Flags = N->getFlags();
14546     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14547         !Flags.hasApproximateFuncs())
14548       return SDValue();
14549
14550     // Do not create a cbrt() libcall if the target does not have it, and do not
14551     // turn a pow that has lowering support into a cbrt() libcall.
14552     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14553         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14554          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14555       return SDValue();
14556
14557     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14558   }
14559
14560   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14561   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14562   // TODO: This could be extended (using a target hook) to handle smaller
14563   // power-of-2 fractional exponents.
14564   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14565   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14566   if (ExponentIs025 || ExponentIs075) {
14567     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
14568     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
14569     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
14570     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
14571     // For regular numbers, rounding may cause the results to differ.
14572     // Therefore, we require { nsz ninf afn } for this transform.
14573     // TODO: We could select out the special cases if we don't have nsz/ninf.
14574     SDNodeFlags Flags = N->getFlags();
14575
14576     // We only need no signed zeros for the 0.25 case.
14577     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
14578         !Flags.hasApproximateFuncs())
14579       return SDValue();
14580
14581     // Don't double the number of libcalls. We are trying to inline fast code.
14582     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
14583       return SDValue();
14584
14585     // Assume that libcalls are the smallest code.
14586     // TODO: This restriction should probably be lifted for vectors.
14587     if (ForCodeSize)
14588       return SDValue();
14589
14590     // pow(X, 0.25) --> sqrt(sqrt(X))
14591     SDLoc DL(N);
14592     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
14593     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
14594     if (ExponentIs025)
14595       return SqrtSqrt;
14596     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
14597     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
14598   }
14599
14600   return SDValue();
14601 }
14602
14603 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
14604                                const TargetLowering &TLI) {
14605   // This optimization is guarded by a function attribute because it may produce
14606   // unexpected results. Ie, programs may be relying on the platform-specific
14607   // undefined behavior when the float-to-int conversion overflows.
14608   const Function &F = DAG.getMachineFunction().getFunction();
14609   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
14610   if (StrictOverflow.getValueAsString().equals("false"))
14611     return SDValue();
14612
14613   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
14614   // replacing casts with a libcall. We also must be allowed to ignore -0.0
14615   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
14616   // conversions would return +0.0.
14617   // FIXME: We should be able to use node-level FMF here.
14618   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
14619   EVT VT = N->getValueType(0);
14620   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
14621       !DAG.getTarget().Options.NoSignedZerosFPMath)
14622     return SDValue();
14623
14624   // fptosi/fptoui round towards zero, so converting from FP to integer and
14625   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
14626   SDValue N0 = N->getOperand(0);
14627   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
14628       N0.getOperand(0).getValueType() == VT)
14629     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14630
14631   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
14632       N0.getOperand(0).getValueType() == VT)
14633     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14634
14635   return SDValue();
14636 }
14637
14638 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
14639   SDValue N0 = N->getOperand(0);
14640   EVT VT = N->getValueType(0);
14641   EVT OpVT = N0.getValueType();
14642
14643   // [us]itofp(undef) = 0, because the result value is bounded.
14644   if (N0.isUndef())
14645     return DAG.getConstantFP(0.0, SDLoc(N), VT);
14646
14647   // fold (sint_to_fp c1) -> c1fp
14648   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14649       // ...but only if the target supports immediate floating-point values
14650       (!LegalOperations ||
14651        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14652     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14653
14654   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
14655   // but UINT_TO_FP is legal on this target, try to convert.
14656   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
14657       hasOperation(ISD::UINT_TO_FP, OpVT)) {
14658     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
14659     if (DAG.SignBitIsZero(N0))
14660       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14661   }
14662
14663   // The next optimizations are desirable only if SELECT_CC can be lowered.
14664   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
14665   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
14666       !VT.isVector() &&
14667       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14668     SDLoc DL(N);
14669     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
14670                          DAG.getConstantFP(0.0, DL, VT));
14671   }
14672
14673   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
14674   //      (select (setcc x, y, cc), 1.0, 0.0)
14675   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
14676       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
14677       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14678     SDLoc DL(N);
14679     return DAG.getSelect(DL, VT, N0.getOperand(0),
14680                          DAG.getConstantFP(1.0, DL, VT),
14681                          DAG.getConstantFP(0.0, DL, VT));
14682   }
14683
14684   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14685     return FTrunc;
14686
14687   return SDValue();
14688 }
14689
14690 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
14691   SDValue N0 = N->getOperand(0);
14692   EVT VT = N->getValueType(0);
14693   EVT OpVT = N0.getValueType();
14694
14695   // [us]itofp(undef) = 0, because the result value is bounded.
14696   if (N0.isUndef())
14697     return DAG.getConstantFP(0.0, SDLoc(N), VT);
14698
14699   // fold (uint_to_fp c1) -> c1fp
14700   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14701       // ...but only if the target supports immediate floating-point values
14702       (!LegalOperations ||
14703        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14704     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14705
14706   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
14707   // but SINT_TO_FP is legal on this target, try to convert.
14708   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
14709       hasOperation(ISD::SINT_TO_FP, OpVT)) {
14710     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
14711     if (DAG.SignBitIsZero(N0))
14712       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14713   }
14714
14715   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
14716   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
14717       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14718     SDLoc DL(N);
14719     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
14720                          DAG.getConstantFP(0.0, DL, VT));
14721   }
14722
14723   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14724     return FTrunc;
14725
14726   return SDValue();
14727 }
14728
14729 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
14730 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
14731   SDValue N0 = N->getOperand(0);
14732   EVT VT = N->getValueType(0);
14733
14734   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
14735     return SDValue();
14736
14737   SDValue Src = N0.getOperand(0);
14738   EVT SrcVT = Src.getValueType();
14739   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
14740   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
14741
14742   // We can safely assume the conversion won't overflow the output range,
14743   // because (for example) (uint8_t)18293.f is undefined behavior.
14744
14745   // Since we can assume the conversion won't overflow, our decision as to
14746   // whether the input will fit in the float should depend on the minimum
14747   // of the input range and output range.
14748
14749   // This means this is also safe for a signed input and unsigned output, since
14750   // a negative input would lead to undefined behavior.
14751   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
14752   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
14753   unsigned ActualSize = std::min(InputSize, OutputSize);
14754   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
14755
14756   // We can only fold away the float conversion if the input range can be
14757   // represented exactly in the float range.
14758   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
14759     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
14760       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
14761                                                        : ISD::ZERO_EXTEND;
14762       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
14763     }
14764     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
14765       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
14766     return DAG.getBitcast(VT, Src);
14767   }
14768   return SDValue();
14769 }
14770
14771 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
14772   SDValue N0 = N->getOperand(0);
14773   EVT VT = N->getValueType(0);
14774
14775   // fold (fp_to_sint undef) -> undef
14776   if (N0.isUndef())
14777     return DAG.getUNDEF(VT);
14778
14779   // fold (fp_to_sint c1fp) -> c1
14780   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14781     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
14782
14783   return FoldIntToFPToInt(N, DAG);
14784 }
14785
14786 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
14787   SDValue N0 = N->getOperand(0);
14788   EVT VT = N->getValueType(0);
14789
14790   // fold (fp_to_uint undef) -> undef
14791   if (N0.isUndef())
14792     return DAG.getUNDEF(VT);
14793
14794   // fold (fp_to_uint c1fp) -> c1
14795   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14796     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
14797
14798   return FoldIntToFPToInt(N, DAG);
14799 }
14800
14801 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
14802   SDValue N0 = N->getOperand(0);
14803   SDValue N1 = N->getOperand(1);
14804   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14805   EVT VT = N->getValueType(0);
14806
14807   // fold (fp_round c1fp) -> c1fp
14808   if (N0CFP)
14809     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
14810
14811   // fold (fp_round (fp_extend x)) -> x
14812   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
14813     return N0.getOperand(0);
14814
14815   // fold (fp_round (fp_round x)) -> (fp_round x)
14816   if (N0.getOpcode() == ISD::FP_ROUND) {
14817     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
14818     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
14819
14820     // Skip this folding if it results in an fp_round from f80 to f16.
14821     //
14822     // f80 to f16 always generates an expensive (and as yet, unimplemented)
14823     // libcall to __truncxfhf2 instead of selecting native f16 conversion
14824     // instructions from f32 or f64.  Moreover, the first (value-preserving)
14825     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
14826     // x86.
14827     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
14828       return SDValue();
14829
14830     // If the first fp_round isn't a value preserving truncation, it might
14831     // introduce a tie in the second fp_round, that wouldn't occur in the
14832     // single-step fp_round we want to fold to.
14833     // In other words, double rounding isn't the same as rounding.
14834     // Also, this is a value preserving truncation iff both fp_round's are.
14835     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
14836       SDLoc DL(N);
14837       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
14838                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
14839     }
14840   }
14841
14842   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
14843   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
14844     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
14845                               N0.getOperand(0), N1);
14846     AddToWorklist(Tmp.getNode());
14847     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
14848                        Tmp, N0.getOperand(1));
14849   }
14850
14851   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14852     return NewVSel;
14853
14854   return SDValue();
14855 }
14856
14857 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
14858   SDValue N0 = N->getOperand(0);
14859   EVT VT = N->getValueType(0);
14860
14861   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
14862   if (N->hasOneUse() &&
14863       N->use_begin()->getOpcode() == ISD::FP_ROUND)
14864     return SDValue();
14865
14866   // fold (fp_extend c1fp) -> c1fp
14867   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14868     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
14869
14870   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
14871   if (N0.getOpcode() == ISD::FP16_TO_FP &&
14872       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
14873     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
14874
14875   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
14876   // value of X.
14877   if (N0.getOpcode() == ISD::FP_ROUND
14878       && N0.getConstantOperandVal(1) == 1) {
14879     SDValue In = N0.getOperand(0);
14880     if (In.getValueType() == VT) return In;
14881     if (VT.bitsLT(In.getValueType()))
14882       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
14883                          In, N0.getOperand(1));
14884     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
14885   }
14886
14887   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
14888   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14889        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14890     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14891     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
14892                                      LN0->getChain(),
14893                                      LN0->getBasePtr(), N0.getValueType(),
14894                                      LN0->getMemOperand());
14895     CombineTo(N, ExtLoad);
14896     CombineTo(N0.getNode(),
14897               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
14898                           N0.getValueType(), ExtLoad,
14899                           DAG.getIntPtrConstant(1, SDLoc(N0))),
14900               ExtLoad.getValue(1));
14901     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14902   }
14903
14904   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14905     return NewVSel;
14906
14907   return SDValue();
14908 }
14909
14910 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
14911   SDValue N0 = N->getOperand(0);
14912   EVT VT = N->getValueType(0);
14913
14914   // fold (fceil c1) -> fceil(c1)
14915   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14916     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
14917
14918   return SDValue();
14919 }
14920
14921 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
14922   SDValue N0 = N->getOperand(0);
14923   EVT VT = N->getValueType(0);
14924
14925   // fold (ftrunc c1) -> ftrunc(c1)
14926   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14927     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
14928
14929   // fold ftrunc (known rounded int x) -> x
14930   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
14931   // likely to be generated to extract integer from a rounded floating value.
14932   switch (N0.getOpcode()) {
14933   default: break;
14934   case ISD::FRINT:
14935   case ISD::FTRUNC:
14936   case ISD::FNEARBYINT:
14937   case ISD::FFLOOR:
14938   case ISD::FCEIL:
14939     return N0;
14940   }
14941
14942   return SDValue();
14943 }
14944
14945 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
14946   SDValue N0 = N->getOperand(0);
14947   EVT VT = N->getValueType(0);
14948
14949   // fold (ffloor c1) -> ffloor(c1)
14950   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14951     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
14952
14953   return SDValue();
14954 }
14955
14956 SDValue DAGCombiner::visitFNEG(SDNode *N) {
14957   SDValue N0 = N->getOperand(0);
14958   EVT VT = N->getValueType(0);
14959   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14960
14961   // Constant fold FNEG.
14962   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14963     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
14964
14965   if (SDValue NegN0 =
14966           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
14967     return NegN0;
14968
14969   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
14970   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
14971   // know it was called from a context with a nsz flag if the input fsub does
14972   // not.
14973   if (N0.getOpcode() == ISD::FSUB &&
14974       (DAG.getTarget().Options.NoSignedZerosFPMath ||
14975        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
14976     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
14977                        N0.getOperand(0));
14978   }
14979
14980   if (SDValue Cast = foldSignChangeInBitcast(N))
14981     return Cast;
14982
14983   return SDValue();
14984 }
14985
14986 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
14987                             APFloat (*Op)(const APFloat &, const APFloat &)) {
14988   SDValue N0 = N->getOperand(0);
14989   SDValue N1 = N->getOperand(1);
14990   EVT VT = N->getValueType(0);
14991   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
14992   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
14993   const SDNodeFlags Flags = N->getFlags();
14994   unsigned Opc = N->getOpcode();
14995   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
14996   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
14997   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14998
14999   if (N0CFP && N1CFP) {
15000     const APFloat &C0 = N0CFP->getValueAPF();
15001     const APFloat &C1 = N1CFP->getValueAPF();
15002     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
15003   }
15004
15005   // Canonicalize to constant on RHS.
15006   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15007       !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15008     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15009
15010   if (N1CFP) {
15011     const APFloat &AF = N1CFP->getValueAPF();
15012
15013     // minnum(X, nan) -> X
15014     // maxnum(X, nan) -> X
15015     // minimum(X, nan) -> nan
15016     // maximum(X, nan) -> nan
15017     if (AF.isNaN())
15018       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15019
15020     // In the following folds, inf can be replaced with the largest finite
15021     // float, if the ninf flag is set.
15022     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15023       // minnum(X, -inf) -> -inf
15024       // maxnum(X, +inf) -> +inf
15025       // minimum(X, -inf) -> -inf if nnan
15026       // maximum(X, +inf) -> +inf if nnan
15027       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15028         return N->getOperand(1);
15029
15030       // minnum(X, +inf) -> X if nnan
15031       // maxnum(X, -inf) -> X if nnan
15032       // minimum(X, +inf) -> X
15033       // maximum(X, -inf) -> X
15034       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15035         return N->getOperand(0);
15036     }
15037   }
15038
15039   return SDValue();
15040 }
15041
15042 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
15043   return visitFMinMax(DAG, N, minnum);
15044 }
15045
15046 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
15047   return visitFMinMax(DAG, N, maxnum);
15048 }
15049
15050 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
15051   return visitFMinMax(DAG, N, minimum);
15052 }
15053
15054 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
15055   return visitFMinMax(DAG, N, maximum);
15056 }
15057
15058 SDValue DAGCombiner::visitFABS(SDNode *N) {
15059   SDValue N0 = N->getOperand(0);
15060   EVT VT = N->getValueType(0);
15061
15062   // fold (fabs c1) -> fabs(c1)
15063   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15064     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15065
15066   // fold (fabs (fabs x)) -> (fabs x)
15067   if (N0.getOpcode() == ISD::FABS)
15068     return N->getOperand(0);
15069
15070   // fold (fabs (fneg x)) -> (fabs x)
15071   // fold (fabs (fcopysign x, y)) -> (fabs x)
15072   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15073     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15074
15075   if (SDValue Cast = foldSignChangeInBitcast(N))
15076     return Cast;
15077
15078   return SDValue();
15079 }
15080
15081 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15082   SDValue Chain = N->getOperand(0);
15083   SDValue N1 = N->getOperand(1);
15084   SDValue N2 = N->getOperand(2);
15085
15086   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15087   // nondeterministic jumps).
15088   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15089     return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15090                        N1->getOperand(0), N2);
15091   }
15092
15093   // If N is a constant we could fold this into a fallthrough or unconditional
15094   // branch. However that doesn't happen very often in normal code, because
15095   // Instcombine/SimplifyCFG should have handled the available opportunities.
15096   // If we did this folding here, it would be necessary to update the
15097   // MachineBasicBlock CFG, which is awkward.
15098
15099   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15100   // on the target.
15101   if (N1.getOpcode() == ISD::SETCC &&
15102       TLI.isOperationLegalOrCustom(ISD::BR_CC,
15103                                    N1.getOperand(0).getValueType())) {
15104     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15105                        Chain, N1.getOperand(2),
15106                        N1.getOperand(0), N1.getOperand(1), N2);
15107   }
15108
15109   if (N1.hasOneUse()) {
15110     // rebuildSetCC calls visitXor which may change the Chain when there is a
15111     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15112     HandleSDNode ChainHandle(Chain);
15113     if (SDValue NewN1 = rebuildSetCC(N1))
15114       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15115                          ChainHandle.getValue(), NewN1, N2);
15116   }
15117
15118   return SDValue();
15119 }
15120
15121 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15122   if (N.getOpcode() == ISD::SRL ||
15123       (N.getOpcode() == ISD::TRUNCATE &&
15124        (N.getOperand(0).hasOneUse() &&
15125         N.getOperand(0).getOpcode() == ISD::SRL))) {
15126     // Look pass the truncate.
15127     if (N.getOpcode() == ISD::TRUNCATE)
15128       N = N.getOperand(0);
15129
15130     // Match this pattern so that we can generate simpler code:
15131     //
15132     //   %a = ...
15133     //   %b = and i32 %a, 2
15134     //   %c = srl i32 %b, 1
15135     //   brcond i32 %c ...
15136     //
15137     // into
15138     //
15139     //   %a = ...
15140     //   %b = and i32 %a, 2
15141     //   %c = setcc eq %b, 0
15142     //   brcond %c ...
15143     //
15144     // This applies only when the AND constant value has one bit set and the
15145     // SRL constant is equal to the log2 of the AND constant. The back-end is
15146     // smart enough to convert the result into a TEST/JMP sequence.
15147     SDValue Op0 = N.getOperand(0);
15148     SDValue Op1 = N.getOperand(1);
15149
15150     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15151       SDValue AndOp1 = Op0.getOperand(1);
15152
15153       if (AndOp1.getOpcode() == ISD::Constant) {
15154         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15155
15156         if (AndConst.isPowerOf2() &&
15157             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15158           SDLoc DL(N);
15159           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15160                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15161                               ISD::SETNE);
15162         }
15163       }
15164     }
15165   }
15166
15167   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15168   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15169   if (N.getOpcode() == ISD::XOR) {
15170     // Because we may call this on a speculatively constructed
15171     // SimplifiedSetCC Node, we need to simplify this node first.
15172     // Ideally this should be folded into SimplifySetCC and not
15173     // here. For now, grab a handle to N so we don't lose it from
15174     // replacements interal to the visit.
15175     HandleSDNode XORHandle(N);
15176     while (N.getOpcode() == ISD::XOR) {
15177       SDValue Tmp = visitXOR(N.getNode());
15178       // No simplification done.
15179       if (!Tmp.getNode())
15180         break;
15181       // Returning N is form in-visit replacement that may invalidated
15182       // N. Grab value from Handle.
15183       if (Tmp.getNode() == N.getNode())
15184         N = XORHandle.getValue();
15185       else // Node simplified. Try simplifying again.
15186         N = Tmp;
15187     }
15188
15189     if (N.getOpcode() != ISD::XOR)
15190       return N;
15191
15192     SDValue Op0 = N->getOperand(0);
15193     SDValue Op1 = N->getOperand(1);
15194
15195     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15196       bool Equal = false;
15197       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15198       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15199           Op0.getValueType() == MVT::i1) {
15200         N = Op0;
15201         Op0 = N->getOperand(0);
15202         Op1 = N->getOperand(1);
15203         Equal = true;
15204       }
15205
15206       EVT SetCCVT = N.getValueType();
15207       if (LegalTypes)
15208         SetCCVT = getSetCCResultType(SetCCVT);
15209       // Replace the uses of XOR with SETCC
15210       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15211                           Equal ? ISD::SETEQ : ISD::SETNE);
15212     }
15213   }
15214
15215   return SDValue();
15216 }
15217
15218 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15219 //
15220 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15221   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15222   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15223
15224   // If N is a constant we could fold this into a fallthrough or unconditional
15225   // branch. However that doesn't happen very often in normal code, because
15226   // Instcombine/SimplifyCFG should have handled the available opportunities.
15227   // If we did this folding here, it would be necessary to update the
15228   // MachineBasicBlock CFG, which is awkward.
15229
15230   // Use SimplifySetCC to simplify SETCC's.
15231   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
15232                                CondLHS, CondRHS, CC->get(), SDLoc(N),
15233                                false);
15234   if (Simp.getNode()) AddToWorklist(Simp.getNode());
15235
15236   // fold to a simpler setcc
15237   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15238     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15239                        N->getOperand(0), Simp.getOperand(2),
15240                        Simp.getOperand(0), Simp.getOperand(1),
15241                        N->getOperand(4));
15242
15243   return SDValue();
15244 }
15245
15246 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15247                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15248                                      const TargetLowering &TLI) {
15249   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15250     if (LD->isIndexed())
15251       return false;
15252     EVT VT = LD->getMemoryVT();
15253     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15254       return false;
15255     Ptr = LD->getBasePtr();
15256   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15257     if (ST->isIndexed())
15258       return false;
15259     EVT VT = ST->getMemoryVT();
15260     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15261       return false;
15262     Ptr = ST->getBasePtr();
15263     IsLoad = false;
15264   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15265     if (LD->isIndexed())
15266       return false;
15267     EVT VT = LD->getMemoryVT();
15268     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15269         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15270       return false;
15271     Ptr = LD->getBasePtr();
15272     IsMasked = true;
15273   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15274     if (ST->isIndexed())
15275       return false;
15276     EVT VT = ST->getMemoryVT();
15277     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15278         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15279       return false;
15280     Ptr = ST->getBasePtr();
15281     IsLoad = false;
15282     IsMasked = true;
15283   } else {
15284     return false;
15285   }
15286   return true;
15287 }
15288
15289 /// Try turning a load/store into a pre-indexed load/store when the base
15290 /// pointer is an add or subtract and it has other uses besides the load/store.
15291 /// After the transformation, the new indexed load/store has effectively folded
15292 /// the add/subtract in and all of its other uses are redirected to the
15293 /// new load/store.
15294 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15295   if (Level < AfterLegalizeDAG)
15296     return false;
15297
15298   bool IsLoad = true;
15299   bool IsMasked = false;
15300   SDValue Ptr;
15301   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15302                                 Ptr, TLI))
15303     return false;
15304
15305   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15306   // out.  There is no reason to make this a preinc/predec.
15307   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15308       Ptr.getNode()->hasOneUse())
15309     return false;
15310
15311   // Ask the target to do addressing mode selection.
15312   SDValue BasePtr;
15313   SDValue Offset;
15314   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15315   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15316     return false;
15317
15318   // Backends without true r+i pre-indexed forms may need to pass a
15319   // constant base with a variable offset so that constant coercion
15320   // will work with the patterns in canonical form.
15321   bool Swapped = false;
15322   if (isa<ConstantSDNode>(BasePtr)) {
15323     std::swap(BasePtr, Offset);
15324     Swapped = true;
15325   }
15326
15327   // Don't create a indexed load / store with zero offset.
15328   if (isNullConstant(Offset))
15329     return false;
15330
15331   // Try turning it into a pre-indexed load / store except when:
15332   // 1) The new base ptr is a frame index.
15333   // 2) If N is a store and the new base ptr is either the same as or is a
15334   //    predecessor of the value being stored.
15335   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15336   //    that would create a cycle.
15337   // 4) All uses are load / store ops that use it as old base ptr.
15338
15339   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
15340   // (plus the implicit offset) to a register to preinc anyway.
15341   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15342     return false;
15343
15344   // Check #2.
15345   if (!IsLoad) {
15346     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15347                            : cast<StoreSDNode>(N)->getValue();
15348
15349     // Would require a copy.
15350     if (Val == BasePtr)
15351       return false;
15352
15353     // Would create a cycle.
15354     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15355       return false;
15356   }
15357
15358   // Caches for hasPredecessorHelper.
15359   SmallPtrSet<const SDNode *, 32> Visited;
15360   SmallVector<const SDNode *, 16> Worklist;
15361   Worklist.push_back(N);
15362
15363   // If the offset is a constant, there may be other adds of constants that
15364   // can be folded with this one. We should do this to avoid having to keep
15365   // a copy of the original base pointer.
15366   SmallVector<SDNode *, 16> OtherUses;
15367   if (isa<ConstantSDNode>(Offset))
15368     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15369                               UE = BasePtr.getNode()->use_end();
15370          UI != UE; ++UI) {
15371       SDUse &Use = UI.getUse();
15372       // Skip the use that is Ptr and uses of other results from BasePtr's
15373       // node (important for nodes that return multiple results).
15374       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15375         continue;
15376
15377       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15378         continue;
15379
15380       if (Use.getUser()->getOpcode() != ISD::ADD &&
15381           Use.getUser()->getOpcode() != ISD::SUB) {
15382         OtherUses.clear();
15383         break;
15384       }
15385
15386       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15387       if (!isa<ConstantSDNode>(Op1)) {
15388         OtherUses.clear();
15389         break;
15390       }
15391
15392       // FIXME: In some cases, we can be smarter about this.
15393       if (Op1.getValueType() != Offset.getValueType()) {
15394         OtherUses.clear();
15395         break;
15396       }
15397
15398       OtherUses.push_back(Use.getUser());
15399     }
15400
15401   if (Swapped)
15402     std::swap(BasePtr, Offset);
15403
15404   // Now check for #3 and #4.
15405   bool RealUse = false;
15406
15407   for (SDNode *Use : Ptr.getNode()->uses()) {
15408     if (Use == N)
15409       continue;
15410     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15411       return false;
15412
15413     // If Ptr may be folded in addressing mode of other use, then it's
15414     // not profitable to do this transformation.
15415     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15416       RealUse = true;
15417   }
15418
15419   if (!RealUse)
15420     return false;
15421
15422   SDValue Result;
15423   if (!IsMasked) {
15424     if (IsLoad)
15425       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15426     else
15427       Result =
15428           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15429   } else {
15430     if (IsLoad)
15431       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15432                                         Offset, AM);
15433     else
15434       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15435                                          Offset, AM);
15436   }
15437   ++PreIndexedNodes;
15438   ++NodesCombined;
15439   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
15440              Result.getNode()->dump(&DAG); dbgs() << '\n');
15441   WorklistRemover DeadNodes(*this);
15442   if (IsLoad) {
15443     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15444     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15445   } else {
15446     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15447   }
15448
15449   // Finally, since the node is now dead, remove it from the graph.
15450   deleteAndRecombine(N);
15451
15452   if (Swapped)
15453     std::swap(BasePtr, Offset);
15454
15455   // Replace other uses of BasePtr that can be updated to use Ptr
15456   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15457     unsigned OffsetIdx = 1;
15458     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15459       OffsetIdx = 0;
15460     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
15461            BasePtr.getNode() && "Expected BasePtr operand");
15462
15463     // We need to replace ptr0 in the following expression:
15464     //   x0 * offset0 + y0 * ptr0 = t0
15465     // knowing that
15466     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15467     //
15468     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15469     // indexed load/store and the expression that needs to be re-written.
15470     //
15471     // Therefore, we have:
15472     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15473
15474     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15475     const APInt &Offset0 = CN->getAPIntValue();
15476     const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15477     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15478     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15479     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15480     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15481
15482     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15483
15484     APInt CNV = Offset0;
15485     if (X0 < 0) CNV = -CNV;
15486     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15487     else CNV = CNV - Offset1;
15488
15489     SDLoc DL(OtherUses[i]);
15490
15491     // We can now generate the new expression.
15492     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15493     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15494
15495     SDValue NewUse = DAG.getNode(Opcode,
15496                                  DL,
15497                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15498     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15499     deleteAndRecombine(OtherUses[i]);
15500   }
15501
15502   // Replace the uses of Ptr with uses of the updated base value.
15503   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15504   deleteAndRecombine(Ptr.getNode());
15505   AddToWorklist(Result.getNode());
15506
15507   return true;
15508 }
15509
15510 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15511                                    SDValue &BasePtr, SDValue &Offset,
15512                                    ISD::MemIndexedMode &AM,
15513                                    SelectionDAG &DAG,
15514                                    const TargetLowering &TLI) {
15515   if (PtrUse == N ||
15516       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15517     return false;
15518
15519   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15520     return false;
15521
15522   // Don't create a indexed load / store with zero offset.
15523   if (isNullConstant(Offset))
15524     return false;
15525
15526   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15527     return false;
15528
15529   SmallPtrSet<const SDNode *, 32> Visited;
15530   for (SDNode *Use : BasePtr.getNode()->uses()) {
15531     if (Use == Ptr.getNode())
15532       continue;
15533
15534     // No if there's a later user which could perform the index instead.
15535     if (isa<MemSDNode>(Use)) {
15536       bool IsLoad = true;
15537       bool IsMasked = false;
15538       SDValue OtherPtr;
15539       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15540                                    IsMasked, OtherPtr, TLI)) {
15541         SmallVector<const SDNode *, 2> Worklist;
15542         Worklist.push_back(Use);
15543         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15544           return false;
15545       }
15546     }
15547
15548     // If all the uses are load / store addresses, then don't do the
15549     // transformation.
15550     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15551       for (SDNode *UseUse : Use->uses())
15552         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15553           return false;
15554     }
15555   }
15556   return true;
15557 }
15558
15559 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15560                                          bool &IsMasked, SDValue &Ptr,
15561                                          SDValue &BasePtr, SDValue &Offset,
15562                                          ISD::MemIndexedMode &AM,
15563                                          SelectionDAG &DAG,
15564                                          const TargetLowering &TLI) {
15565   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15566                                 IsMasked, Ptr, TLI) ||
15567       Ptr.getNode()->hasOneUse())
15568     return nullptr;
15569
15570   // Try turning it into a post-indexed load / store except when
15571   // 1) All uses are load / store ops that use it as base ptr (and
15572   //    it may be folded as addressing mmode).
15573   // 2) Op must be independent of N, i.e. Op is neither a predecessor
15574   //    nor a successor of N. Otherwise, if Op is folded that would
15575   //    create a cycle.
15576   for (SDNode *Op : Ptr->uses()) {
15577     // Check for #1.
15578     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15579       continue;
15580
15581     // Check for #2.
15582     SmallPtrSet<const SDNode *, 32> Visited;
15583     SmallVector<const SDNode *, 8> Worklist;
15584     // Ptr is predecessor to both N and Op.
15585     Visited.insert(Ptr.getNode());
15586     Worklist.push_back(N);
15587     Worklist.push_back(Op);
15588     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15589         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15590       return Op;
15591   }
15592   return nullptr;
15593 }
15594
15595 /// Try to combine a load/store with a add/sub of the base pointer node into a
15596 /// post-indexed load/store. The transformation folded the add/subtract into the
15597 /// new indexed load/store effectively and all of its uses are redirected to the
15598 /// new load/store.
15599 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
15600   if (Level < AfterLegalizeDAG)
15601     return false;
15602
15603   bool IsLoad = true;
15604   bool IsMasked = false;
15605   SDValue Ptr;
15606   SDValue BasePtr;
15607   SDValue Offset;
15608   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15609   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
15610                                          Offset, AM, DAG, TLI);
15611   if (!Op)
15612     return false;
15613
15614   SDValue Result;
15615   if (!IsMasked)
15616     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15617                                          Offset, AM)
15618                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
15619                                           BasePtr, Offset, AM);
15620   else
15621     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
15622                                                BasePtr, Offset, AM)
15623                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
15624                                                 BasePtr, Offset, AM);
15625   ++PostIndexedNodes;
15626   ++NodesCombined;
15627   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
15628              dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
15629              dbgs() << '\n');
15630   WorklistRemover DeadNodes(*this);
15631   if (IsLoad) {
15632     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15633     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15634   } else {
15635     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15636   }
15637
15638   // Finally, since the node is now dead, remove it from the graph.
15639   deleteAndRecombine(N);
15640
15641   // Replace the uses of Use with uses of the updated base value.
15642   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
15643                                 Result.getValue(IsLoad ? 1 : 0));
15644   deleteAndRecombine(Op);
15645   return true;
15646 }
15647
15648 /// Return the base-pointer arithmetic from an indexed \p LD.
15649 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
15650   ISD::MemIndexedMode AM = LD->getAddressingMode();
15651   assert(AM != ISD::UNINDEXED);
15652   SDValue BP = LD->getOperand(1);
15653   SDValue Inc = LD->getOperand(2);
15654
15655   // Some backends use TargetConstants for load offsets, but don't expect
15656   // TargetConstants in general ADD nodes. We can convert these constants into
15657   // regular Constants (if the constant is not opaque).
15658   assert((Inc.getOpcode() != ISD::TargetConstant ||
15659           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
15660          "Cannot split out indexing using opaque target constants");
15661   if (Inc.getOpcode() == ISD::TargetConstant) {
15662     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
15663     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
15664                           ConstInc->getValueType(0));
15665   }
15666
15667   unsigned Opc =
15668       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
15669   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
15670 }
15671
15672 static inline ElementCount numVectorEltsOrZero(EVT T) {
15673   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
15674 }
15675
15676 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
15677   Val = ST->getValue();
15678   EVT STType = Val.getValueType();
15679   EVT STMemType = ST->getMemoryVT();
15680   if (STType == STMemType)
15681     return true;
15682   if (isTypeLegal(STMemType))
15683     return false; // fail.
15684   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
15685       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
15686     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
15687     return true;
15688   }
15689   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
15690       STType.isInteger() && STMemType.isInteger()) {
15691     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
15692     return true;
15693   }
15694   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
15695     Val = DAG.getBitcast(STMemType, Val);
15696     return true;
15697   }
15698   return false; // fail.
15699 }
15700
15701 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
15702   EVT LDMemType = LD->getMemoryVT();
15703   EVT LDType = LD->getValueType(0);
15704   assert(Val.getValueType() == LDMemType &&
15705          "Attempting to extend value of non-matching type");
15706   if (LDType == LDMemType)
15707     return true;
15708   if (LDMemType.isInteger() && LDType.isInteger()) {
15709     switch (LD->getExtensionType()) {
15710     case ISD::NON_EXTLOAD:
15711       Val = DAG.getBitcast(LDType, Val);
15712       return true;
15713     case ISD::EXTLOAD:
15714       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
15715       return true;
15716     case ISD::SEXTLOAD:
15717       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
15718       return true;
15719     case ISD::ZEXTLOAD:
15720       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
15721       return true;
15722     }
15723   }
15724   return false;
15725 }
15726
15727 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
15728   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
15729     return SDValue();
15730   SDValue Chain = LD->getOperand(0);
15731   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
15732   // TODO: Relax this restriction for unordered atomics (see D66309)
15733   if (!ST || !ST->isSimple())
15734     return SDValue();
15735
15736   EVT LDType = LD->getValueType(0);
15737   EVT LDMemType = LD->getMemoryVT();
15738   EVT STMemType = ST->getMemoryVT();
15739   EVT STType = ST->getValue().getValueType();
15740
15741   // There are two cases to consider here:
15742   //  1. The store is fixed width and the load is scalable. In this case we
15743   //     don't know at compile time if the store completely envelops the load
15744   //     so we abandon the optimisation.
15745   //  2. The store is scalable and the load is fixed width. We could
15746   //     potentially support a limited number of cases here, but there has been
15747   //     no cost-benefit analysis to prove it's worth it.
15748   bool LdStScalable = LDMemType.isScalableVector();
15749   if (LdStScalable != STMemType.isScalableVector())
15750     return SDValue();
15751
15752   // If we are dealing with scalable vectors on a big endian platform the
15753   // calculation of offsets below becomes trickier, since we do not know at
15754   // compile time the absolute size of the vector. Until we've done more
15755   // analysis on big-endian platforms it seems better to bail out for now.
15756   if (LdStScalable && DAG.getDataLayout().isBigEndian())
15757     return SDValue();
15758
15759   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
15760   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
15761   int64_t Offset;
15762   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
15763     return SDValue();
15764
15765   // Normalize for Endianness. After this Offset=0 will denote that the least
15766   // significant bit in the loaded value maps to the least significant bit in
15767   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
15768   // n:th least significant byte of the stored value.
15769   if (DAG.getDataLayout().isBigEndian())
15770     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
15771               (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
15772                  8 -
15773              Offset;
15774
15775   // Check that the stored value cover all bits that are loaded.
15776   bool STCoversLD;
15777
15778   TypeSize LdMemSize = LDMemType.getSizeInBits();
15779   TypeSize StMemSize = STMemType.getSizeInBits();
15780   if (LdStScalable)
15781     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
15782   else
15783     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
15784                                    StMemSize.getFixedSize());
15785
15786   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
15787     if (LD->isIndexed()) {
15788       // Cannot handle opaque target constants and we must respect the user's
15789       // request not to split indexes from loads.
15790       if (!canSplitIdx(LD))
15791         return SDValue();
15792       SDValue Idx = SplitIndexingFromLoad(LD);
15793       SDValue Ops[] = {Val, Idx, Chain};
15794       return CombineTo(LD, Ops, 3);
15795     }
15796     return CombineTo(LD, Val, Chain);
15797   };
15798
15799   if (!STCoversLD)
15800     return SDValue();
15801
15802   // Memory as copy space (potentially masked).
15803   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
15804     // Simple case: Direct non-truncating forwarding
15805     if (LDType.getSizeInBits() == LdMemSize)
15806       return ReplaceLd(LD, ST->getValue(), Chain);
15807     // Can we model the truncate and extension with an and mask?
15808     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
15809         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
15810       // Mask to size of LDMemType
15811       auto Mask =
15812           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
15813                                                StMemSize.getFixedSize()),
15814                           SDLoc(ST), STType);
15815       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
15816       return ReplaceLd(LD, Val, Chain);
15817     }
15818   }
15819
15820   // TODO: Deal with nonzero offset.
15821   if (LD->getBasePtr().isUndef() || Offset != 0)
15822     return SDValue();
15823   // Model necessary truncations / extenstions.
15824   SDValue Val;
15825   // Truncate Value To Stored Memory Size.
15826   do {
15827     if (!getTruncatedStoreValue(ST, Val))
15828       continue;
15829     if (!isTypeLegal(LDMemType))
15830       continue;
15831     if (STMemType != LDMemType) {
15832       // TODO: Support vectors? This requires extract_subvector/bitcast.
15833       if (!STMemType.isVector() && !LDMemType.isVector() &&
15834           STMemType.isInteger() && LDMemType.isInteger())
15835         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
15836       else
15837         continue;
15838     }
15839     if (!extendLoadedValueToExtension(LD, Val))
15840       continue;
15841     return ReplaceLd(LD, Val, Chain);
15842   } while (false);
15843
15844   // On failure, cleanup dead nodes we may have created.
15845   if (Val->use_empty())
15846     deleteAndRecombine(Val.getNode());
15847   return SDValue();
15848 }
15849
15850 SDValue DAGCombiner::visitLOAD(SDNode *N) {
15851   LoadSDNode *LD  = cast<LoadSDNode>(N);
15852   SDValue Chain = LD->getChain();
15853   SDValue Ptr   = LD->getBasePtr();
15854
15855   // If load is not volatile and there are no uses of the loaded value (and
15856   // the updated indexed value in case of indexed loads), change uses of the
15857   // chain value into uses of the chain input (i.e. delete the dead load).
15858   // TODO: Allow this for unordered atomics (see D66309)
15859   if (LD->isSimple()) {
15860     if (N->getValueType(1) == MVT::Other) {
15861       // Unindexed loads.
15862       if (!N->hasAnyUseOfValue(0)) {
15863         // It's not safe to use the two value CombineTo variant here. e.g.
15864         // v1, chain2 = load chain1, loc
15865         // v2, chain3 = load chain2, loc
15866         // v3         = add v2, c
15867         // Now we replace use of chain2 with chain1.  This makes the second load
15868         // isomorphic to the one we are deleting, and thus makes this load live.
15869         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
15870                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
15871                    dbgs() << "\n");
15872         WorklistRemover DeadNodes(*this);
15873         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15874         AddUsersToWorklist(Chain.getNode());
15875         if (N->use_empty())
15876           deleteAndRecombine(N);
15877
15878         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15879       }
15880     } else {
15881       // Indexed loads.
15882       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
15883
15884       // If this load has an opaque TargetConstant offset, then we cannot split
15885       // the indexing into an add/sub directly (that TargetConstant may not be
15886       // valid for a different type of node, and we cannot convert an opaque
15887       // target constant into a regular constant).
15888       bool CanSplitIdx = canSplitIdx(LD);
15889
15890       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
15891         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
15892         SDValue Index;
15893         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
15894           Index = SplitIndexingFromLoad(LD);
15895           // Try to fold the base pointer arithmetic into subsequent loads and
15896           // stores.
15897           AddUsersToWorklist(N);
15898         } else
15899           Index = DAG.getUNDEF(N->getValueType(1));
15900         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
15901                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
15902                    dbgs() << " and 2 other values\n");
15903         WorklistRemover DeadNodes(*this);
15904         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
15905         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
15906         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
15907         deleteAndRecombine(N);
15908         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15909       }
15910     }
15911   }
15912
15913   // If this load is directly stored, replace the load value with the stored
15914   // value.
15915   if (auto V = ForwardStoreValueToDirectLoad(LD))
15916     return V;
15917
15918   // Try to infer better alignment information than the load already has.
15919   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
15920     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
15921       if (*Alignment > LD->getAlign() &&
15922           isAligned(*Alignment, LD->getSrcValueOffset())) {
15923         SDValue NewLoad = DAG.getExtLoad(
15924             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
15925             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
15926             LD->getMemOperand()->getFlags(), LD->getAAInfo());
15927         // NewLoad will always be N as we are only refining the alignment
15928         assert(NewLoad.getNode() == N);
15929         (void)NewLoad;
15930       }
15931     }
15932   }
15933
15934   if (LD->isUnindexed()) {
15935     // Walk up chain skipping non-aliasing memory nodes.
15936     SDValue BetterChain = FindBetterChain(LD, Chain);
15937
15938     // If there is a better chain.
15939     if (Chain != BetterChain) {
15940       SDValue ReplLoad;
15941
15942       // Replace the chain to void dependency.
15943       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
15944         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
15945                                BetterChain, Ptr, LD->getMemOperand());
15946       } else {
15947         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
15948                                   LD->getValueType(0),
15949                                   BetterChain, Ptr, LD->getMemoryVT(),
15950                                   LD->getMemOperand());
15951       }
15952
15953       // Create token factor to keep old chain connected.
15954       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
15955                                   MVT::Other, Chain, ReplLoad.getValue(1));
15956
15957       // Replace uses with load result and token factor
15958       return CombineTo(N, ReplLoad.getValue(0), Token);
15959     }
15960   }
15961
15962   // Try transforming N to an indexed load.
15963   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15964     return SDValue(N, 0);
15965
15966   // Try to slice up N to more direct loads if the slices are mapped to
15967   // different register banks or pairing can take place.
15968   if (SliceUpLoad(N))
15969     return SDValue(N, 0);
15970
15971   return SDValue();
15972 }
15973
15974 namespace {
15975
15976 /// Helper structure used to slice a load in smaller loads.
15977 /// Basically a slice is obtained from the following sequence:
15978 /// Origin = load Ty1, Base
15979 /// Shift = srl Ty1 Origin, CstTy Amount
15980 /// Inst = trunc Shift to Ty2
15981 ///
15982 /// Then, it will be rewritten into:
15983 /// Slice = load SliceTy, Base + SliceOffset
15984 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
15985 ///
15986 /// SliceTy is deduced from the number of bits that are actually used to
15987 /// build Inst.
15988 struct LoadedSlice {
15989   /// Helper structure used to compute the cost of a slice.
15990   struct Cost {
15991     /// Are we optimizing for code size.
15992     bool ForCodeSize = false;
15993
15994     /// Various cost.
15995     unsigned Loads = 0;
15996     unsigned Truncates = 0;
15997     unsigned CrossRegisterBanksCopies = 0;
15998     unsigned ZExts = 0;
15999     unsigned Shift = 0;
16000
16001     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16002
16003     /// Get the cost of one isolated slice.
16004     Cost(const LoadedSlice &LS, bool ForCodeSize)
16005         : ForCodeSize(ForCodeSize), Loads(1) {
16006       EVT TruncType = LS.Inst->getValueType(0);
16007       EVT LoadedType = LS.getLoadedType();
16008       if (TruncType != LoadedType &&
16009           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16010         ZExts = 1;
16011     }
16012
16013     /// Account for slicing gain in the current cost.
16014     /// Slicing provide a few gains like removing a shift or a
16015     /// truncate. This method allows to grow the cost of the original
16016     /// load with the gain from this slice.
16017     void addSliceGain(const LoadedSlice &LS) {
16018       // Each slice saves a truncate.
16019       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16020       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16021                               LS.Inst->getValueType(0)))
16022         ++Truncates;
16023       // If there is a shift amount, this slice gets rid of it.
16024       if (LS.Shift)
16025         ++Shift;
16026       // If this slice can merge a cross register bank copy, account for it.
16027       if (LS.canMergeExpensiveCrossRegisterBankCopy())
16028         ++CrossRegisterBanksCopies;
16029     }
16030
16031     Cost &operator+=(const Cost &RHS) {
16032       Loads += RHS.Loads;
16033       Truncates += RHS.Truncates;
16034       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16035       ZExts += RHS.ZExts;
16036       Shift += RHS.Shift;
16037       return *this;
16038     }
16039
16040     bool operator==(const Cost &RHS) const {
16041       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16042              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16043              ZExts == RHS.ZExts && Shift == RHS.Shift;
16044     }
16045
16046     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16047
16048     bool operator<(const Cost &RHS) const {
16049       // Assume cross register banks copies are as expensive as loads.
16050       // FIXME: Do we want some more target hooks?
16051       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16052       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16053       // Unless we are optimizing for code size, consider the
16054       // expensive operation first.
16055       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16056         return ExpensiveOpsLHS < ExpensiveOpsRHS;
16057       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16058              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16059     }
16060
16061     bool operator>(const Cost &RHS) const { return RHS < *this; }
16062
16063     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16064
16065     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16066   };
16067
16068   // The last instruction that represent the slice. This should be a
16069   // truncate instruction.
16070   SDNode *Inst;
16071
16072   // The original load instruction.
16073   LoadSDNode *Origin;
16074
16075   // The right shift amount in bits from the original load.
16076   unsigned Shift;
16077
16078   // The DAG from which Origin came from.
16079   // This is used to get some contextual information about legal types, etc.
16080   SelectionDAG *DAG;
16081
16082   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16083               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16084       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16085
16086   /// Get the bits used in a chunk of bits \p BitWidth large.
16087   /// \return Result is \p BitWidth and has used bits set to 1 and
16088   ///         not used bits set to 0.
16089   APInt getUsedBits() const {
16090     // Reproduce the trunc(lshr) sequence:
16091     // - Start from the truncated value.
16092     // - Zero extend to the desired bit width.
16093     // - Shift left.
16094     assert(Origin && "No original load to compare against.");
16095     unsigned BitWidth = Origin->getValueSizeInBits(0);
16096     assert(Inst && "This slice is not bound to an instruction");
16097     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
16098            "Extracted slice is bigger than the whole type!");
16099     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16100     UsedBits.setAllBits();
16101     UsedBits = UsedBits.zext(BitWidth);
16102     UsedBits <<= Shift;
16103     return UsedBits;
16104   }
16105
16106   /// Get the size of the slice to be loaded in bytes.
16107   unsigned getLoadedSize() const {
16108     unsigned SliceSize = getUsedBits().countPopulation();
16109     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
16110     return SliceSize / 8;
16111   }
16112
16113   /// Get the type that will be loaded for this slice.
16114   /// Note: This may not be the final type for the slice.
16115   EVT getLoadedType() const {
16116     assert(DAG && "Missing context");
16117     LLVMContext &Ctxt = *DAG->getContext();
16118     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16119   }
16120
16121   /// Get the alignment of the load used for this slice.
16122   Align getAlign() const {
16123     Align Alignment = Origin->getAlign();
16124     uint64_t Offset = getOffsetFromBase();
16125     if (Offset != 0)
16126       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16127     return Alignment;
16128   }
16129
16130   /// Check if this slice can be rewritten with legal operations.
16131   bool isLegal() const {
16132     // An invalid slice is not legal.
16133     if (!Origin || !Inst || !DAG)
16134       return false;
16135
16136     // Offsets are for indexed load only, we do not handle that.
16137     if (!Origin->getOffset().isUndef())
16138       return false;
16139
16140     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16141
16142     // Check that the type is legal.
16143     EVT SliceType = getLoadedType();
16144     if (!TLI.isTypeLegal(SliceType))
16145       return false;
16146
16147     // Check that the load is legal for this type.
16148     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16149       return false;
16150
16151     // Check that the offset can be computed.
16152     // 1. Check its type.
16153     EVT PtrType = Origin->getBasePtr().getValueType();
16154     if (PtrType == MVT::Untyped || PtrType.isExtended())
16155       return false;
16156
16157     // 2. Check that it fits in the immediate.
16158     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16159       return false;
16160
16161     // 3. Check that the computation is legal.
16162     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16163       return false;
16164
16165     // Check that the zext is legal if it needs one.
16166     EVT TruncateType = Inst->getValueType(0);
16167     if (TruncateType != SliceType &&
16168         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
16169       return false;
16170
16171     return true;
16172   }
16173
16174   /// Get the offset in bytes of this slice in the original chunk of
16175   /// bits.
16176   /// \pre DAG != nullptr.
16177   uint64_t getOffsetFromBase() const {
16178     assert(DAG && "Missing context.");
16179     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16180     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
16181     uint64_t Offset = Shift / 8;
16182     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16183     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
16184            "The size of the original loaded type is not a multiple of a"
16185            " byte.");
16186     // If Offset is bigger than TySizeInBytes, it means we are loading all
16187     // zeros. This should have been optimized before in the process.
16188     assert(TySizeInBytes > Offset &&
16189            "Invalid shift amount for given loaded size");
16190     if (IsBigEndian)
16191       Offset = TySizeInBytes - Offset - getLoadedSize();
16192     return Offset;
16193   }
16194
16195   /// Generate the sequence of instructions to load the slice
16196   /// represented by this object and redirect the uses of this slice to
16197   /// this new sequence of instructions.
16198   /// \pre this->Inst && this->Origin are valid Instructions and this
16199   /// object passed the legal check: LoadedSlice::isLegal returned true.
16200   /// \return The last instruction of the sequence used to load the slice.
16201   SDValue loadSlice() const {
16202     assert(Inst && Origin && "Unable to replace a non-existing slice.");
16203     const SDValue &OldBaseAddr = Origin->getBasePtr();
16204     SDValue BaseAddr = OldBaseAddr;
16205     // Get the offset in that chunk of bytes w.r.t. the endianness.
16206     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16207     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
16208     if (Offset) {
16209       // BaseAddr = BaseAddr + Offset.
16210       EVT ArithType = BaseAddr.getValueType();
16211       SDLoc DL(Origin);
16212       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16213                               DAG->getConstant(Offset, DL, ArithType));
16214     }
16215
16216     // Create the type of the loaded slice according to its size.
16217     EVT SliceType = getLoadedType();
16218
16219     // Create the load for the slice.
16220     SDValue LastInst =
16221         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16222                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
16223                      Origin->getMemOperand()->getFlags());
16224     // If the final type is not the same as the loaded type, this means that
16225     // we have to pad with zero. Create a zero extend for that.
16226     EVT FinalType = Inst->getValueType(0);
16227     if (SliceType != FinalType)
16228       LastInst =
16229           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16230     return LastInst;
16231   }
16232
16233   /// Check if this slice can be merged with an expensive cross register
16234   /// bank copy. E.g.,
16235   /// i = load i32
16236   /// f = bitcast i32 i to float
16237   bool canMergeExpensiveCrossRegisterBankCopy() const {
16238     if (!Inst || !Inst->hasOneUse())
16239       return false;
16240     SDNode *Use = *Inst->use_begin();
16241     if (Use->getOpcode() != ISD::BITCAST)
16242       return false;
16243     assert(DAG && "Missing context");
16244     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16245     EVT ResVT = Use->getValueType(0);
16246     const TargetRegisterClass *ResRC =
16247         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16248     const TargetRegisterClass *ArgRC =
16249         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16250                            Use->getOperand(0)->isDivergent());
16251     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16252       return false;
16253
16254     // At this point, we know that we perform a cross-register-bank copy.
16255     // Check if it is expensive.
16256     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
16257     // Assume bitcasts are cheap, unless both register classes do not
16258     // explicitly share a common sub class.
16259     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16260       return false;
16261
16262     // Check if it will be merged with the load.
16263     // 1. Check the alignment constraint.
16264     Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
16265         ResVT.getTypeForEVT(*DAG->getContext()));
16266
16267     if (RequiredAlignment > getAlign())
16268       return false;
16269
16270     // 2. Check that the load is a legal operation for that type.
16271     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16272       return false;
16273
16274     // 3. Check that we do not have a zext in the way.
16275     if (Inst->getValueType(0) != getLoadedType())
16276       return false;
16277
16278     return true;
16279   }
16280 };
16281
16282 } // end anonymous namespace
16283
16284 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
16285 /// \p UsedBits looks like 0..0 1..1 0..0.
16286 static bool areUsedBitsDense(const APInt &UsedBits) {
16287   // If all the bits are one, this is dense!
16288   if (UsedBits.isAllOnesValue())
16289     return true;
16290
16291   // Get rid of the unused bits on the right.
16292   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16293   // Get rid of the unused bits on the left.
16294   if (NarrowedUsedBits.countLeadingZeros())
16295     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16296   // Check that the chunk of bits is completely used.
16297   return NarrowedUsedBits.isAllOnesValue();
16298 }
16299
16300 /// Check whether or not \p First and \p Second are next to each other
16301 /// in memory. This means that there is no hole between the bits loaded
16302 /// by \p First and the bits loaded by \p Second.
16303 static bool areSlicesNextToEachOther(const LoadedSlice &First,
16304                                      const LoadedSlice &Second) {
16305   assert(First.Origin == Second.Origin && First.Origin &&
16306          "Unable to match different memory origins.");
16307   APInt UsedBits = First.getUsedBits();
16308   assert((UsedBits & Second.getUsedBits()) == 0 &&
16309          "Slices are not supposed to overlap.");
16310   UsedBits |= Second.getUsedBits();
16311   return areUsedBitsDense(UsedBits);
16312 }
16313
16314 /// Adjust the \p GlobalLSCost according to the target
16315 /// paring capabilities and the layout of the slices.
16316 /// \pre \p GlobalLSCost should account for at least as many loads as
16317 /// there is in the slices in \p LoadedSlices.
16318 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16319                                  LoadedSlice::Cost &GlobalLSCost) {
16320   unsigned NumberOfSlices = LoadedSlices.size();
16321   // If there is less than 2 elements, no pairing is possible.
16322   if (NumberOfSlices < 2)
16323     return;
16324
16325   // Sort the slices so that elements that are likely to be next to each
16326   // other in memory are next to each other in the list.
16327   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16328     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
16329     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16330   });
16331   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16332   // First (resp. Second) is the first (resp. Second) potentially candidate
16333   // to be placed in a paired load.
16334   const LoadedSlice *First = nullptr;
16335   const LoadedSlice *Second = nullptr;
16336   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16337                 // Set the beginning of the pair.
16338                                                            First = Second) {
16339     Second = &LoadedSlices[CurrSlice];
16340
16341     // If First is NULL, it means we start a new pair.
16342     // Get to the next slice.
16343     if (!First)
16344       continue;
16345
16346     EVT LoadedType = First->getLoadedType();
16347
16348     // If the types of the slices are different, we cannot pair them.
16349     if (LoadedType != Second->getLoadedType())
16350       continue;
16351
16352     // Check if the target supplies paired loads for this type.
16353     Align RequiredAlignment;
16354     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16355       // move to the next pair, this type is hopeless.
16356       Second = nullptr;
16357       continue;
16358     }
16359     // Check if we meet the alignment requirement.
16360     if (First->getAlign() < RequiredAlignment)
16361       continue;
16362
16363     // Check that both loads are next to each other in memory.
16364     if (!areSlicesNextToEachOther(*First, *Second))
16365       continue;
16366
16367     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
16368     --GlobalLSCost.Loads;
16369     // Move to the next pair.
16370     Second = nullptr;
16371   }
16372 }
16373
16374 /// Check the profitability of all involved LoadedSlice.
16375 /// Currently, it is considered profitable if there is exactly two
16376 /// involved slices (1) which are (2) next to each other in memory, and
16377 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16378 ///
16379 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
16380 /// the elements themselves.
16381 ///
16382 /// FIXME: When the cost model will be mature enough, we can relax
16383 /// constraints (1) and (2).
16384 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16385                                 const APInt &UsedBits, bool ForCodeSize) {
16386   unsigned NumberOfSlices = LoadedSlices.size();
16387   if (StressLoadSlicing)
16388     return NumberOfSlices > 1;
16389
16390   // Check (1).
16391   if (NumberOfSlices != 2)
16392     return false;
16393
16394   // Check (2).
16395   if (!areUsedBitsDense(UsedBits))
16396     return false;
16397
16398   // Check (3).
16399   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16400   // The original code has one big load.
16401   OrigCost.Loads = 1;
16402   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16403     const LoadedSlice &LS = LoadedSlices[CurrSlice];
16404     // Accumulate the cost of all the slices.
16405     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16406     GlobalSlicingCost += SliceCost;
16407
16408     // Account as cost in the original configuration the gain obtained
16409     // with the current slices.
16410     OrigCost.addSliceGain(LS);
16411   }
16412
16413   // If the target supports paired load, adjust the cost accordingly.
16414   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16415   return OrigCost > GlobalSlicingCost;
16416 }
16417
16418 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
16419 /// operations, split it in the various pieces being extracted.
16420 ///
16421 /// This sort of thing is introduced by SROA.
16422 /// This slicing takes care not to insert overlapping loads.
16423 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
16424 bool DAGCombiner::SliceUpLoad(SDNode *N) {
16425   if (Level < AfterLegalizeDAG)
16426     return false;
16427
16428   LoadSDNode *LD = cast<LoadSDNode>(N);
16429   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16430       !LD->getValueType(0).isInteger())
16431     return false;
16432
16433   // The algorithm to split up a load of a scalable vector into individual
16434   // elements currently requires knowing the length of the loaded type,
16435   // so will need adjusting to work on scalable vectors.
16436   if (LD->getValueType(0).isScalableVector())
16437     return false;
16438
16439   // Keep track of already used bits to detect overlapping values.
16440   // In that case, we will just abort the transformation.
16441   APInt UsedBits(LD->getValueSizeInBits(0), 0);
16442
16443   SmallVector<LoadedSlice, 4> LoadedSlices;
16444
16445   // Check if this load is used as several smaller chunks of bits.
16446   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16447   // of computation for each trunc.
16448   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16449        UI != UIEnd; ++UI) {
16450     // Skip the uses of the chain.
16451     if (UI.getUse().getResNo() != 0)
16452       continue;
16453
16454     SDNode *User = *UI;
16455     unsigned Shift = 0;
16456
16457     // Check if this is a trunc(lshr).
16458     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16459         isa<ConstantSDNode>(User->getOperand(1))) {
16460       Shift = User->getConstantOperandVal(1);
16461       User = *User->use_begin();
16462     }
16463
16464     // At this point, User is a Truncate, iff we encountered, trunc or
16465     // trunc(lshr).
16466     if (User->getOpcode() != ISD::TRUNCATE)
16467       return false;
16468
16469     // The width of the type must be a power of 2 and greater than 8-bits.
16470     // Otherwise the load cannot be represented in LLVM IR.
16471     // Moreover, if we shifted with a non-8-bits multiple, the slice
16472     // will be across several bytes. We do not support that.
16473     unsigned Width = User->getValueSizeInBits(0);
16474     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16475       return false;
16476
16477     // Build the slice for this chain of computations.
16478     LoadedSlice LS(User, LD, Shift, &DAG);
16479     APInt CurrentUsedBits = LS.getUsedBits();
16480
16481     // Check if this slice overlaps with another.
16482     if ((CurrentUsedBits & UsedBits) != 0)
16483       return false;
16484     // Update the bits used globally.
16485     UsedBits |= CurrentUsedBits;
16486
16487     // Check if the new slice would be legal.
16488     if (!LS.isLegal())
16489       return false;
16490
16491     // Record the slice.
16492     LoadedSlices.push_back(LS);
16493   }
16494
16495   // Abort slicing if it does not seem to be profitable.
16496   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16497     return false;
16498
16499   ++SlicedLoads;
16500
16501   // Rewrite each chain to use an independent load.
16502   // By construction, each chain can be represented by a unique load.
16503
16504   // Prepare the argument for the new token factor for all the slices.
16505   SmallVector<SDValue, 8> ArgChains;
16506   for (const LoadedSlice &LS : LoadedSlices) {
16507     SDValue SliceInst = LS.loadSlice();
16508     CombineTo(LS.Inst, SliceInst, true);
16509     if (SliceInst.getOpcode() != ISD::LOAD)
16510       SliceInst = SliceInst.getOperand(0);
16511     assert(SliceInst->getOpcode() == ISD::LOAD &&
16512            "It takes more than a zext to get to the loaded slice!!");
16513     ArgChains.push_back(SliceInst.getValue(1));
16514   }
16515
16516   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
16517                               ArgChains);
16518   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16519   AddToWorklist(Chain.getNode());
16520   return true;
16521 }
16522
16523 /// Check to see if V is (and load (ptr), imm), where the load is having
16524 /// specific bytes cleared out.  If so, return the byte size being masked out
16525 /// and the shift amount.
16526 static std::pair<unsigned, unsigned>
16527 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
16528   std::pair<unsigned, unsigned> Result(0, 0);
16529
16530   // Check for the structure we're looking for.
16531   if (V->getOpcode() != ISD::AND ||
16532       !isa<ConstantSDNode>(V->getOperand(1)) ||
16533       !ISD::isNormalLoad(V->getOperand(0).getNode()))
16534     return Result;
16535
16536   // Check the chain and pointer.
16537   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16538   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
16539
16540   // This only handles simple types.
16541   if (V.getValueType() != MVT::i16 &&
16542       V.getValueType() != MVT::i32 &&
16543       V.getValueType() != MVT::i64)
16544     return Result;
16545
16546   // Check the constant mask.  Invert it so that the bits being masked out are
16547   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
16548   // follow the sign bit for uniformity.
16549   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16550   unsigned NotMaskLZ = countLeadingZeros(NotMask);
16551   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
16552   unsigned NotMaskTZ = countTrailingZeros(NotMask);
16553   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
16554   if (NotMaskLZ == 64) return Result;  // All zero mask.
16555
16556   // See if we have a continuous run of bits.  If so, we have 0*1+0*
16557   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16558     return Result;
16559
16560   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16561   if (V.getValueType() != MVT::i64 && NotMaskLZ)
16562     NotMaskLZ -= 64-V.getValueSizeInBits();
16563
16564   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16565   switch (MaskedBytes) {
16566   case 1:
16567   case 2:
16568   case 4: break;
16569   default: return Result; // All one mask, or 5-byte mask.
16570   }
16571
16572   // Verify that the first bit starts at a multiple of mask so that the access
16573   // is aligned the same as the access width.
16574   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16575
16576   // For narrowing to be valid, it must be the case that the load the
16577   // immediately preceding memory operation before the store.
16578   if (LD == Chain.getNode())
16579     ; // ok.
16580   else if (Chain->getOpcode() == ISD::TokenFactor &&
16581            SDValue(LD, 1).hasOneUse()) {
16582     // LD has only 1 chain use so they are no indirect dependencies.
16583     if (!LD->isOperandOf(Chain.getNode()))
16584       return Result;
16585   } else
16586     return Result; // Fail.
16587
16588   Result.first = MaskedBytes;
16589   Result.second = NotMaskTZ/8;
16590   return Result;
16591 }
16592
16593 /// Check to see if IVal is something that provides a value as specified by
16594 /// MaskInfo. If so, replace the specified store with a narrower store of
16595 /// truncated IVal.
16596 static SDValue
16597 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
16598                                 SDValue IVal, StoreSDNode *St,
16599                                 DAGCombiner *DC) {
16600   unsigned NumBytes = MaskInfo.first;
16601   unsigned ByteShift = MaskInfo.second;
16602   SelectionDAG &DAG = DC->getDAG();
16603
16604   // Check to see if IVal is all zeros in the part being masked in by the 'or'
16605   // that uses this.  If not, this is not a replacement.
16606   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
16607                                   ByteShift*8, (ByteShift+NumBytes)*8);
16608   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
16609
16610   // Check that it is legal on the target to do this.  It is legal if the new
16611   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
16612   // legalization (and the target doesn't explicitly think this is a bad idea).
16613   MVT VT = MVT::getIntegerVT(NumBytes * 8);
16614   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16615   if (!DC->isTypeLegal(VT))
16616     return SDValue();
16617   if (St->getMemOperand() &&
16618       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16619                               *St->getMemOperand()))
16620     return SDValue();
16621
16622   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
16623   // shifted by ByteShift and truncated down to NumBytes.
16624   if (ByteShift) {
16625     SDLoc DL(IVal);
16626     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
16627                        DAG.getConstant(ByteShift*8, DL,
16628                                     DC->getShiftAmountTy(IVal.getValueType())));
16629   }
16630
16631   // Figure out the offset for the store and the alignment of the access.
16632   unsigned StOffset;
16633   if (DAG.getDataLayout().isLittleEndian())
16634     StOffset = ByteShift;
16635   else
16636     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
16637
16638   SDValue Ptr = St->getBasePtr();
16639   if (StOffset) {
16640     SDLoc DL(IVal);
16641     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
16642   }
16643
16644   // Truncate down to the new size.
16645   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
16646
16647   ++OpsNarrowed;
16648   return DAG
16649       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
16650                 St->getPointerInfo().getWithOffset(StOffset),
16651                 St->getOriginalAlign());
16652 }
16653
16654 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
16655 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
16656 /// narrowing the load and store if it would end up being a win for performance
16657 /// or code size.
16658 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
16659   StoreSDNode *ST  = cast<StoreSDNode>(N);
16660   if (!ST->isSimple())
16661     return SDValue();
16662
16663   SDValue Chain = ST->getChain();
16664   SDValue Value = ST->getValue();
16665   SDValue Ptr   = ST->getBasePtr();
16666   EVT VT = Value.getValueType();
16667
16668   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
16669     return SDValue();
16670
16671   unsigned Opc = Value.getOpcode();
16672
16673   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
16674   // is a byte mask indicating a consecutive number of bytes, check to see if
16675   // Y is known to provide just those bytes.  If so, we try to replace the
16676   // load + replace + store sequence with a single (narrower) store, which makes
16677   // the load dead.
16678   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
16679     std::pair<unsigned, unsigned> MaskedLoad;
16680     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
16681     if (MaskedLoad.first)
16682       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16683                                                   Value.getOperand(1), ST,this))
16684         return NewST;
16685
16686     // Or is commutative, so try swapping X and Y.
16687     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
16688     if (MaskedLoad.first)
16689       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16690                                                   Value.getOperand(0), ST,this))
16691         return NewST;
16692   }
16693
16694   if (!EnableReduceLoadOpStoreWidth)
16695     return SDValue();
16696
16697   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
16698       Value.getOperand(1).getOpcode() != ISD::Constant)
16699     return SDValue();
16700
16701   SDValue N0 = Value.getOperand(0);
16702   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16703       Chain == SDValue(N0.getNode(), 1)) {
16704     LoadSDNode *LD = cast<LoadSDNode>(N0);
16705     if (LD->getBasePtr() != Ptr ||
16706         LD->getPointerInfo().getAddrSpace() !=
16707         ST->getPointerInfo().getAddrSpace())
16708       return SDValue();
16709
16710     // Find the type to narrow it the load / op / store to.
16711     SDValue N1 = Value.getOperand(1);
16712     unsigned BitWidth = N1.getValueSizeInBits();
16713     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
16714     if (Opc == ISD::AND)
16715       Imm ^= APInt::getAllOnesValue(BitWidth);
16716     if (Imm == 0 || Imm.isAllOnesValue())
16717       return SDValue();
16718     unsigned ShAmt = Imm.countTrailingZeros();
16719     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
16720     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
16721     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16722     // The narrowing should be profitable, the load/store operation should be
16723     // legal (or custom) and the store size should be equal to the NewVT width.
16724     while (NewBW < BitWidth &&
16725            (NewVT.getStoreSizeInBits() != NewBW ||
16726             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
16727             !TLI.isNarrowingProfitable(VT, NewVT))) {
16728       NewBW = NextPowerOf2(NewBW);
16729       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16730     }
16731     if (NewBW >= BitWidth)
16732       return SDValue();
16733
16734     // If the lsb changed does not start at the type bitwidth boundary,
16735     // start at the previous one.
16736     if (ShAmt % NewBW)
16737       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
16738     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
16739                                    std::min(BitWidth, ShAmt + NewBW));
16740     if ((Imm & Mask) == Imm) {
16741       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
16742       if (Opc == ISD::AND)
16743         NewImm ^= APInt::getAllOnesValue(NewBW);
16744       uint64_t PtrOff = ShAmt / 8;
16745       // For big endian targets, we need to adjust the offset to the pointer to
16746       // load the correct bytes.
16747       if (DAG.getDataLayout().isBigEndian())
16748         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
16749
16750       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
16751       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
16752       if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
16753         return SDValue();
16754
16755       SDValue NewPtr =
16756           DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
16757       SDValue NewLD =
16758           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
16759                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
16760                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
16761       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
16762                                    DAG.getConstant(NewImm, SDLoc(Value),
16763                                                    NewVT));
16764       SDValue NewST =
16765           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
16766                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
16767
16768       AddToWorklist(NewPtr.getNode());
16769       AddToWorklist(NewLD.getNode());
16770       AddToWorklist(NewVal.getNode());
16771       WorklistRemover DeadNodes(*this);
16772       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
16773       ++OpsNarrowed;
16774       return NewST;
16775     }
16776   }
16777
16778   return SDValue();
16779 }
16780
16781 /// For a given floating point load / store pair, if the load value isn't used
16782 /// by any other operations, then consider transforming the pair to integer
16783 /// load / store operations if the target deems the transformation profitable.
16784 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
16785   StoreSDNode *ST  = cast<StoreSDNode>(N);
16786   SDValue Value = ST->getValue();
16787   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
16788       Value.hasOneUse()) {
16789     LoadSDNode *LD = cast<LoadSDNode>(Value);
16790     EVT VT = LD->getMemoryVT();
16791     if (!VT.isFloatingPoint() ||
16792         VT != ST->getMemoryVT() ||
16793         LD->isNonTemporal() ||
16794         ST->isNonTemporal() ||
16795         LD->getPointerInfo().getAddrSpace() != 0 ||
16796         ST->getPointerInfo().getAddrSpace() != 0)
16797       return SDValue();
16798
16799     TypeSize VTSize = VT.getSizeInBits();
16800
16801     // We don't know the size of scalable types at compile time so we cannot
16802     // create an integer of the equivalent size.
16803     if (VTSize.isScalable())
16804       return SDValue();
16805
16806     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
16807     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
16808         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
16809         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
16810         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
16811       return SDValue();
16812
16813     Align LDAlign = LD->getAlign();
16814     Align STAlign = ST->getAlign();
16815     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
16816     Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
16817     if (LDAlign < ABIAlign || STAlign < ABIAlign)
16818       return SDValue();
16819
16820     SDValue NewLD =
16821         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
16822                     LD->getPointerInfo(), LDAlign);
16823
16824     SDValue NewST =
16825         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
16826                      ST->getPointerInfo(), STAlign);
16827
16828     AddToWorklist(NewLD.getNode());
16829     AddToWorklist(NewST.getNode());
16830     WorklistRemover DeadNodes(*this);
16831     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
16832     ++LdStFP2Int;
16833     return NewST;
16834   }
16835
16836   return SDValue();
16837 }
16838
16839 // This is a helper function for visitMUL to check the profitability
16840 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
16841 // MulNode is the original multiply, AddNode is (add x, c1),
16842 // and ConstNode is c2.
16843 //
16844 // If the (add x, c1) has multiple uses, we could increase
16845 // the number of adds if we make this transformation.
16846 // It would only be worth doing this if we can remove a
16847 // multiply in the process. Check for that here.
16848 // To illustrate:
16849 //     (A + c1) * c3
16850 //     (A + c2) * c3
16851 // We're checking for cases where we have common "c3 * A" expressions.
16852 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
16853                                               SDValue &AddNode,
16854                                               SDValue &ConstNode) {
16855   APInt Val;
16856
16857   // If the add only has one use, and the target thinks the folding is
16858   // profitable or does not lead to worse code, this would be OK to do.
16859   if (AddNode.getNode()->hasOneUse() &&
16860       TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
16861     return true;
16862
16863   // Walk all the users of the constant with which we're multiplying.
16864   for (SDNode *Use : ConstNode->uses()) {
16865     if (Use == MulNode) // This use is the one we're on right now. Skip it.
16866       continue;
16867
16868     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
16869       SDNode *OtherOp;
16870       SDNode *MulVar = AddNode.getOperand(0).getNode();
16871
16872       // OtherOp is what we're multiplying against the constant.
16873       if (Use->getOperand(0) == ConstNode)
16874         OtherOp = Use->getOperand(1).getNode();
16875       else
16876         OtherOp = Use->getOperand(0).getNode();
16877
16878       // Check to see if multiply is with the same operand of our "add".
16879       //
16880       //     ConstNode  = CONST
16881       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
16882       //     ...
16883       //     AddNode  = (A + c1)  <-- MulVar is A.
16884       //         = AddNode * ConstNode   <-- current visiting instruction.
16885       //
16886       // If we make this transformation, we will have a common
16887       // multiply (ConstNode * A) that we can save.
16888       if (OtherOp == MulVar)
16889         return true;
16890
16891       // Now check to see if a future expansion will give us a common
16892       // multiply.
16893       //
16894       //     ConstNode  = CONST
16895       //     AddNode    = (A + c1)
16896       //     ...   = AddNode * ConstNode <-- current visiting instruction.
16897       //     ...
16898       //     OtherOp = (A + c2)
16899       //     Use     = OtherOp * ConstNode <-- visiting Use.
16900       //
16901       // If we make this transformation, we will have a common
16902       // multiply (CONST * A) after we also do the same transformation
16903       // to the "t2" instruction.
16904       if (OtherOp->getOpcode() == ISD::ADD &&
16905           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
16906           OtherOp->getOperand(0).getNode() == MulVar)
16907         return true;
16908     }
16909   }
16910
16911   // Didn't find a case where this would be profitable.
16912   return false;
16913 }
16914
16915 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
16916                                          unsigned NumStores) {
16917   SmallVector<SDValue, 8> Chains;
16918   SmallPtrSet<const SDNode *, 8> Visited;
16919   SDLoc StoreDL(StoreNodes[0].MemNode);
16920
16921   for (unsigned i = 0; i < NumStores; ++i) {
16922     Visited.insert(StoreNodes[i].MemNode);
16923   }
16924
16925   // don't include nodes that are children or repeated nodes.
16926   for (unsigned i = 0; i < NumStores; ++i) {
16927     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
16928       Chains.push_back(StoreNodes[i].MemNode->getChain());
16929   }
16930
16931   assert(Chains.size() > 0 && "Chain should have generated a chain");
16932   return DAG.getTokenFactor(StoreDL, Chains);
16933 }
16934
16935 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
16936     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
16937     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
16938   // Make sure we have something to merge.
16939   if (NumStores < 2)
16940     return false;
16941
16942   assert((!UseTrunc || !UseVector) &&
16943          "This optimization cannot emit a vector truncating store");
16944
16945   // The latest Node in the DAG.
16946   SDLoc DL(StoreNodes[0].MemNode);
16947
16948   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
16949   unsigned SizeInBits = NumStores * ElementSizeBits;
16950   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16951
16952   EVT StoreTy;
16953   if (UseVector) {
16954     unsigned Elts = NumStores * NumMemElts;
16955     // Get the type for the merged vector store.
16956     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16957   } else
16958     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
16959
16960   SDValue StoredVal;
16961   if (UseVector) {
16962     if (IsConstantSrc) {
16963       SmallVector<SDValue, 8> BuildVector;
16964       for (unsigned I = 0; I != NumStores; ++I) {
16965         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
16966         SDValue Val = St->getValue();
16967         // If constant is of the wrong type, convert it now.
16968         if (MemVT != Val.getValueType()) {
16969           Val = peekThroughBitcasts(Val);
16970           // Deal with constants of wrong size.
16971           if (ElementSizeBits != Val.getValueSizeInBits()) {
16972             EVT IntMemVT =
16973                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
16974             if (isa<ConstantFPSDNode>(Val)) {
16975               // Not clear how to truncate FP values.
16976               return false;
16977             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
16978               Val = DAG.getConstant(C->getAPIntValue()
16979                                         .zextOrTrunc(Val.getValueSizeInBits())
16980                                         .zextOrTrunc(ElementSizeBits),
16981                                     SDLoc(C), IntMemVT);
16982           }
16983           // Make sure correctly size type is the correct type.
16984           Val = DAG.getBitcast(MemVT, Val);
16985         }
16986         BuildVector.push_back(Val);
16987       }
16988       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16989                                                : ISD::BUILD_VECTOR,
16990                               DL, StoreTy, BuildVector);
16991     } else {
16992       SmallVector<SDValue, 8> Ops;
16993       for (unsigned i = 0; i < NumStores; ++i) {
16994         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16995         SDValue Val = peekThroughBitcasts(St->getValue());
16996         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
16997         // type MemVT. If the underlying value is not the correct
16998         // type, but it is an extraction of an appropriate vector we
16999         // can recast Val to be of the correct type. This may require
17000         // converting between EXTRACT_VECTOR_ELT and
17001         // EXTRACT_SUBVECTOR.
17002         if ((MemVT != Val.getValueType()) &&
17003             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
17004              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
17005           EVT MemVTScalarTy = MemVT.getScalarType();
17006           // We may need to add a bitcast here to get types to line up.
17007           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17008             Val = DAG.getBitcast(MemVT, Val);
17009           } else {
17010             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17011                                             : ISD::EXTRACT_VECTOR_ELT;
17012             SDValue Vec = Val.getOperand(0);
17013             SDValue Idx = Val.getOperand(1);
17014             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17015           }
17016         }
17017         Ops.push_back(Val);
17018       }
17019
17020       // Build the extracted vector elements back into a vector.
17021       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17022                                                : ISD::BUILD_VECTOR,
17023                               DL, StoreTy, Ops);
17024     }
17025   } else {
17026     // We should always use a vector store when merging extracted vector
17027     // elements, so this path implies a store of constants.
17028     assert(IsConstantSrc && "Merged vector elements should use vector store");
17029
17030     APInt StoreInt(SizeInBits, 0);
17031
17032     // Construct a single integer constant which is made of the smaller
17033     // constant inputs.
17034     bool IsLE = DAG.getDataLayout().isLittleEndian();
17035     for (unsigned i = 0; i < NumStores; ++i) {
17036       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17037       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17038
17039       SDValue Val = St->getValue();
17040       Val = peekThroughBitcasts(Val);
17041       StoreInt <<= ElementSizeBits;
17042       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17043         StoreInt |= C->getAPIntValue()
17044                         .zextOrTrunc(ElementSizeBits)
17045                         .zextOrTrunc(SizeInBits);
17046       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17047         StoreInt |= C->getValueAPF()
17048                         .bitcastToAPInt()
17049                         .zextOrTrunc(ElementSizeBits)
17050                         .zextOrTrunc(SizeInBits);
17051         // If fp truncation is necessary give up for now.
17052         if (MemVT.getSizeInBits() != ElementSizeBits)
17053           return false;
17054       } else {
17055         llvm_unreachable("Invalid constant element type");
17056       }
17057     }
17058
17059     // Create the new Load and Store operations.
17060     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
17061   }
17062
17063   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17064   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
17065
17066   // make sure we use trunc store if it's necessary to be legal.
17067   SDValue NewStore;
17068   if (!UseTrunc) {
17069     NewStore =
17070         DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17071                      FirstInChain->getPointerInfo(), FirstInChain->getAlign());
17072   } else { // Must be realized as a trunc store
17073     EVT LegalizedStoredValTy =
17074         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17075     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17076     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17077     SDValue ExtendedStoreVal =
17078         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17079                         LegalizedStoredValTy);
17080     NewStore = DAG.getTruncStore(
17081         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17082         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17083         FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17084   }
17085
17086   // Replace all merged stores with the new store.
17087   for (unsigned i = 0; i < NumStores; ++i)
17088     CombineTo(StoreNodes[i].MemNode, NewStore);
17089
17090   AddToWorklist(NewChain.getNode());
17091   return true;
17092 }
17093
17094 void DAGCombiner::getStoreMergeCandidates(
17095     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17096     SDNode *&RootNode) {
17097   // This holds the base pointer, index, and the offset in bytes from the base
17098   // pointer. We must have a base and an offset. Do not handle stores to undef
17099   // base pointers.
17100   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17101   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17102     return;
17103
17104   SDValue Val = peekThroughBitcasts(St->getValue());
17105   StoreSource StoreSrc = getStoreSource(Val);
17106   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
17107
17108   // Match on loadbaseptr if relevant.
17109   EVT MemVT = St->getMemoryVT();
17110   BaseIndexOffset LBasePtr;
17111   EVT LoadVT;
17112   if (StoreSrc == StoreSource::Load) {
17113     auto *Ld = cast<LoadSDNode>(Val);
17114     LBasePtr = BaseIndexOffset::match(Ld, DAG);
17115     LoadVT = Ld->getMemoryVT();
17116     // Load and store should be the same type.
17117     if (MemVT != LoadVT)
17118       return;
17119     // Loads must only have one use.
17120     if (!Ld->hasNUsesOfValue(1, 0))
17121       return;
17122     // The memory operands must not be volatile/indexed/atomic.
17123     // TODO: May be able to relax for unordered atomics (see D66309)
17124     if (!Ld->isSimple() || Ld->isIndexed())
17125       return;
17126   }
17127   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17128                             int64_t &Offset) -> bool {
17129     // The memory operands must not be volatile/indexed/atomic.
17130     // TODO: May be able to relax for unordered atomics (see D66309)
17131     if (!Other->isSimple() || Other->isIndexed())
17132       return false;
17133     // Don't mix temporal stores with non-temporal stores.
17134     if (St->isNonTemporal() != Other->isNonTemporal())
17135       return false;
17136     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
17137     // Allow merging constants of different types as integers.
17138     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17139                                            : Other->getMemoryVT() != MemVT;
17140     switch (StoreSrc) {
17141     case StoreSource::Load: {
17142       if (NoTypeMatch)
17143         return false;
17144       // The Load's Base Ptr must also match.
17145       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
17146       if (!OtherLd)
17147         return false;
17148       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
17149       if (LoadVT != OtherLd->getMemoryVT())
17150         return false;
17151       // Loads must only have one use.
17152       if (!OtherLd->hasNUsesOfValue(1, 0))
17153         return false;
17154       // The memory operands must not be volatile/indexed/atomic.
17155       // TODO: May be able to relax for unordered atomics (see D66309)
17156       if (!OtherLd->isSimple() || OtherLd->isIndexed())
17157         return false;
17158       // Don't mix temporal loads with non-temporal loads.
17159       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17160         return false;
17161       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17162         return false;
17163       break;
17164     }
17165     case StoreSource::Constant:
17166       if (NoTypeMatch)
17167         return false;
17168       if (!isIntOrFPConstant(OtherBC))
17169         return false;
17170       break;
17171     case StoreSource::Extract:
17172       // Do not merge truncated stores here.
17173       if (Other->isTruncatingStore())
17174         return false;
17175       if (!MemVT.bitsEq(OtherBC.getValueType()))
17176         return false;
17177       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17178           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17179         return false;
17180       break;
17181     default:
17182       llvm_unreachable("Unhandled store source for merging");
17183     }
17184     Ptr = BaseIndexOffset::match(Other, DAG);
17185     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17186   };
17187
17188   // Check if the pair of StoreNode and the RootNode already bail out many
17189   // times which is over the limit in dependence check.
17190   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
17191                                         SDNode *RootNode) -> bool {
17192     auto RootCount = StoreRootCountMap.find(StoreNode);
17193     return RootCount != StoreRootCountMap.end() &&
17194            RootCount->second.first == RootNode &&
17195            RootCount->second.second > StoreMergeDependenceLimit;
17196   };
17197
17198   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
17199     // This must be a chain use.
17200     if (UseIter.getOperandNo() != 0)
17201       return;
17202     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
17203       BaseIndexOffset Ptr;
17204       int64_t PtrDiff;
17205       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17206           !OverLimitInDependenceCheck(OtherStore, RootNode))
17207         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17208     }
17209   };
17210
17211   // We looking for a root node which is an ancestor to all mergable
17212   // stores. We search up through a load, to our root and then down
17213   // through all children. For instance we will find Store{1,2,3} if
17214   // St is Store1, Store2. or Store3 where the root is not a load
17215   // which always true for nonvolatile ops. TODO: Expand
17216   // the search to find all valid candidates through multiple layers of loads.
17217   //
17218   // Root
17219   // |-------|-------|
17220   // Load    Load    Store3
17221   // |       |
17222   // Store1   Store2
17223   //
17224   // FIXME: We should be able to climb and
17225   // descend TokenFactors to find candidates as well.
17226
17227   RootNode = St->getChain().getNode();
17228
17229   unsigned NumNodesExplored = 0;
17230   const unsigned MaxSearchNodes = 1024;
17231   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17232     RootNode = Ldn->getChain().getNode();
17233     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17234          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17235       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17236         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17237           TryToAddCandidate(I2);
17238       }
17239     }
17240   } else {
17241     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17242          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17243       TryToAddCandidate(I);
17244   }
17245 }
17246
17247 // We need to check that merging these stores does not cause a loop in
17248 // the DAG. Any store candidate may depend on another candidate
17249 // indirectly through its operand (we already consider dependencies
17250 // through the chain). Check in parallel by searching up from
17251 // non-chain operands of candidates.
17252 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17253     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17254     SDNode *RootNode) {
17255   // FIXME: We should be able to truncate a full search of
17256   // predecessors by doing a BFS and keeping tabs the originating
17257   // stores from which worklist nodes come from in a similar way to
17258   // TokenFactor simplfication.
17259
17260   SmallPtrSet<const SDNode *, 32> Visited;
17261   SmallVector<const SDNode *, 8> Worklist;
17262
17263   // RootNode is a predecessor to all candidates so we need not search
17264   // past it. Add RootNode (peeking through TokenFactors). Do not count
17265   // these towards size check.
17266
17267   Worklist.push_back(RootNode);
17268   while (!Worklist.empty()) {
17269     auto N = Worklist.pop_back_val();
17270     if (!Visited.insert(N).second)
17271       continue; // Already present in Visited.
17272     if (N->getOpcode() == ISD::TokenFactor) {
17273       for (SDValue Op : N->ops())
17274         Worklist.push_back(Op.getNode());
17275     }
17276   }
17277
17278   // Don't count pruning nodes towards max.
17279   unsigned int Max = 1024 + Visited.size();
17280   // Search Ops of store candidates.
17281   for (unsigned i = 0; i < NumStores; ++i) {
17282     SDNode *N = StoreNodes[i].MemNode;
17283     // Of the 4 Store Operands:
17284     //   * Chain (Op 0) -> We have already considered these
17285     //                    in candidate selection and can be
17286     //                    safely ignored
17287     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17288     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17289     //                       but aren't necessarily fromt the same base node, so
17290     //                       cycles possible (e.g. via indexed store).
17291     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17292     //               non-indexed stores). Not constant on all targets (e.g. ARM)
17293     //               and so can participate in a cycle.
17294     for (unsigned j = 1; j < N->getNumOperands(); ++j)
17295       Worklist.push_back(N->getOperand(j).getNode());
17296   }
17297   // Search through DAG. We can stop early if we find a store node.
17298   for (unsigned i = 0; i < NumStores; ++i)
17299     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17300                                      Max)) {
17301       // If the searching bail out, record the StoreNode and RootNode in the
17302       // StoreRootCountMap. If we have seen the pair many times over a limit,
17303       // we won't add the StoreNode into StoreNodes set again.
17304       if (Visited.size() >= Max) {
17305         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17306         if (RootCount.first == RootNode)
17307           RootCount.second++;
17308         else
17309           RootCount = {RootNode, 1};
17310       }
17311       return false;
17312     }
17313   return true;
17314 }
17315
17316 unsigned
17317 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17318                                   int64_t ElementSizeBytes) const {
17319   while (true) {
17320     // Find a store past the width of the first store.
17321     size_t StartIdx = 0;
17322     while ((StartIdx + 1 < StoreNodes.size()) &&
17323            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17324               StoreNodes[StartIdx + 1].OffsetFromBase)
17325       ++StartIdx;
17326
17327     // Bail if we don't have enough candidates to merge.
17328     if (StartIdx + 1 >= StoreNodes.size())
17329       return 0;
17330
17331     // Trim stores that overlapped with the first store.
17332     if (StartIdx)
17333       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17334
17335     // Scan the memory operations on the chain and find the first
17336     // non-consecutive store memory address.
17337     unsigned NumConsecutiveStores = 1;
17338     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17339     // Check that the addresses are consecutive starting from the second
17340     // element in the list of stores.
17341     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17342       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17343       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17344         break;
17345       NumConsecutiveStores = i + 1;
17346     }
17347     if (NumConsecutiveStores > 1)
17348       return NumConsecutiveStores;
17349
17350     // There are no consecutive stores at the start of the list.
17351     // Remove the first store and try again.
17352     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17353   }
17354 }
17355
17356 bool DAGCombiner::tryStoreMergeOfConstants(
17357     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17358     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17359   LLVMContext &Context = *DAG.getContext();
17360   const DataLayout &DL = DAG.getDataLayout();
17361   int64_t ElementSizeBytes = MemVT.getStoreSize();
17362   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17363   bool MadeChange = false;
17364
17365   // Store the constants into memory as one consecutive store.
17366   while (NumConsecutiveStores >= 2) {
17367     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17368     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17369     unsigned FirstStoreAlign = FirstInChain->getAlignment();
17370     unsigned LastLegalType = 1;
17371     unsigned LastLegalVectorType = 1;
17372     bool LastIntegerTrunc = false;
17373     bool NonZero = false;
17374     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17375     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17376       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17377       SDValue StoredVal = ST->getValue();
17378       bool IsElementZero = false;
17379       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17380         IsElementZero = C->isNullValue();
17381       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17382         IsElementZero = C->getConstantFPValue()->isNullValue();
17383       if (IsElementZero) {
17384         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17385           FirstZeroAfterNonZero = i;
17386       }
17387       NonZero |= !IsElementZero;
17388
17389       // Find a legal type for the constant store.
17390       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17391       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17392       bool IsFast = false;
17393
17394       // Break early when size is too large to be legal.
17395       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17396         break;
17397
17398       if (TLI.isTypeLegal(StoreTy) &&
17399           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17400                                DAG.getMachineFunction()) &&
17401           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17402                                  *FirstInChain->getMemOperand(), &IsFast) &&
17403           IsFast) {
17404         LastIntegerTrunc = false;
17405         LastLegalType = i + 1;
17406         // Or check whether a truncstore is legal.
17407       } else if (TLI.getTypeAction(Context, StoreTy) ==
17408                  TargetLowering::TypePromoteInteger) {
17409         EVT LegalizedStoredValTy =
17410             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17411         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17412             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17413                                  DAG.getMachineFunction()) &&
17414             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17415                                    *FirstInChain->getMemOperand(), &IsFast) &&
17416             IsFast) {
17417           LastIntegerTrunc = true;
17418           LastLegalType = i + 1;
17419         }
17420       }
17421
17422       // We only use vectors if the constant is known to be zero or the
17423       // target allows it and the function is not marked with the
17424       // noimplicitfloat attribute.
17425       if ((!NonZero ||
17426            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17427           AllowVectors) {
17428         // Find a legal type for the vector store.
17429         unsigned Elts = (i + 1) * NumMemElts;
17430         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17431         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17432             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17433             TLI.allowsMemoryAccess(Context, DL, Ty,
17434                                    *FirstInChain->getMemOperand(), &IsFast) &&
17435             IsFast)
17436           LastLegalVectorType = i + 1;
17437       }
17438     }
17439
17440     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17441     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17442     bool UseTrunc = LastIntegerTrunc && !UseVector;
17443
17444     // Check if we found a legal integer type that creates a meaningful
17445     // merge.
17446     if (NumElem < 2) {
17447       // We know that candidate stores are in order and of correct
17448       // shape. While there is no mergeable sequence from the
17449       // beginning one may start later in the sequence. The only
17450       // reason a merge of size N could have failed where another of
17451       // the same size would not have, is if the alignment has
17452       // improved or we've dropped a non-zero value. Drop as many
17453       // candidates as we can here.
17454       unsigned NumSkip = 1;
17455       while ((NumSkip < NumConsecutiveStores) &&
17456              (NumSkip < FirstZeroAfterNonZero) &&
17457              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17458         NumSkip++;
17459
17460       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17461       NumConsecutiveStores -= NumSkip;
17462       continue;
17463     }
17464
17465     // Check that we can merge these candidates without causing a cycle.
17466     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17467                                                   RootNode)) {
17468       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17469       NumConsecutiveStores -= NumElem;
17470       continue;
17471     }
17472
17473     MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
17474                                                   /*IsConstantSrc*/ true,
17475                                                   UseVector, UseTrunc);
17476
17477     // Remove merged stores for next iteration.
17478     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17479     NumConsecutiveStores -= NumElem;
17480   }
17481   return MadeChange;
17482 }
17483
17484 bool DAGCombiner::tryStoreMergeOfExtracts(
17485     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17486     EVT MemVT, SDNode *RootNode) {
17487   LLVMContext &Context = *DAG.getContext();
17488   const DataLayout &DL = DAG.getDataLayout();
17489   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17490   bool MadeChange = false;
17491
17492   // Loop on Consecutive Stores on success.
17493   while (NumConsecutiveStores >= 2) {
17494     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17495     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17496     unsigned FirstStoreAlign = FirstInChain->getAlignment();
17497     unsigned NumStoresToMerge = 1;
17498     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17499       // Find a legal type for the vector store.
17500       unsigned Elts = (i + 1) * NumMemElts;
17501       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17502       bool IsFast = false;
17503
17504       // Break early when size is too large to be legal.
17505       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17506         break;
17507
17508       if (TLI.isTypeLegal(Ty) &&
17509           TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
17510           TLI.allowsMemoryAccess(Context, DL, Ty,
17511                                  *FirstInChain->getMemOperand(), &IsFast) &&
17512           IsFast)
17513         NumStoresToMerge = i + 1;
17514     }
17515
17516     // Check if we found a legal integer type creating a meaningful
17517     // merge.
17518     if (NumStoresToMerge < 2) {
17519       // We know that candidate stores are in order and of correct
17520       // shape. While there is no mergeable sequence from the
17521       // beginning one may start later in the sequence. The only
17522       // reason a merge of size N could have failed where another of
17523       // the same size would not have, is if the alignment has
17524       // improved. Drop as many candidates as we can here.
17525       unsigned NumSkip = 1;
17526       while ((NumSkip < NumConsecutiveStores) &&
17527              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17528         NumSkip++;
17529
17530       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17531       NumConsecutiveStores -= NumSkip;
17532       continue;
17533     }
17534
17535     // Check that we can merge these candidates without causing a cycle.
17536     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17537                                                   RootNode)) {
17538       StoreNodes.erase(StoreNodes.begin(),
17539                        StoreNodes.begin() + NumStoresToMerge);
17540       NumConsecutiveStores -= NumStoresToMerge;
17541       continue;
17542     }
17543
17544     MadeChange |= mergeStoresOfConstantsOrVecElts(
17545         StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
17546         /*UseVector*/ true, /*UseTrunc*/ false);
17547
17548     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17549     NumConsecutiveStores -= NumStoresToMerge;
17550   }
17551   return MadeChange;
17552 }
17553
17554 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17555                                        unsigned NumConsecutiveStores, EVT MemVT,
17556                                        SDNode *RootNode, bool AllowVectors,
17557                                        bool IsNonTemporalStore,
17558                                        bool IsNonTemporalLoad) {
17559   LLVMContext &Context = *DAG.getContext();
17560   const DataLayout &DL = DAG.getDataLayout();
17561   int64_t ElementSizeBytes = MemVT.getStoreSize();
17562   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17563   bool MadeChange = false;
17564
17565   // Look for load nodes which are used by the stored values.
17566   SmallVector<MemOpLink, 8> LoadNodes;
17567
17568   // Find acceptable loads. Loads need to have the same chain (token factor),
17569   // must not be zext, volatile, indexed, and they must be consecutive.
17570   BaseIndexOffset LdBasePtr;
17571
17572   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17573     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17574     SDValue Val = peekThroughBitcasts(St->getValue());
17575     LoadSDNode *Ld = cast<LoadSDNode>(Val);
17576
17577     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
17578     // If this is not the first ptr that we check.
17579     int64_t LdOffset = 0;
17580     if (LdBasePtr.getBase().getNode()) {
17581       // The base ptr must be the same.
17582       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
17583         break;
17584     } else {
17585       // Check that all other base pointers are the same as this one.
17586       LdBasePtr = LdPtr;
17587     }
17588
17589     // We found a potential memory operand to merge.
17590     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
17591   }
17592
17593   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
17594     Align RequiredAlignment;
17595     bool NeedRotate = false;
17596     if (LoadNodes.size() == 2) {
17597       // If we have load/store pair instructions and we only have two values,
17598       // don't bother merging.
17599       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
17600           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
17601         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
17602         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
17603         break;
17604       }
17605       // If the loads are reversed, see if we can rotate the halves into place.
17606       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
17607       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
17608       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
17609       if (Offset0 - Offset1 == ElementSizeBytes &&
17610           (hasOperation(ISD::ROTL, PairVT) ||
17611            hasOperation(ISD::ROTR, PairVT))) {
17612         std::swap(LoadNodes[0], LoadNodes[1]);
17613         NeedRotate = true;
17614       }
17615     }
17616     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17617     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17618     Align FirstStoreAlign = FirstInChain->getAlign();
17619     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
17620
17621     // Scan the memory operations on the chain and find the first
17622     // non-consecutive load memory address. These variables hold the index in
17623     // the store node array.
17624
17625     unsigned LastConsecutiveLoad = 1;
17626
17627     // This variable refers to the size and not index in the array.
17628     unsigned LastLegalVectorType = 1;
17629     unsigned LastLegalIntegerType = 1;
17630     bool isDereferenceable = true;
17631     bool DoIntegerTruncate = false;
17632     int64_t StartAddress = LoadNodes[0].OffsetFromBase;
17633     SDValue LoadChain = FirstLoad->getChain();
17634     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
17635       // All loads must share the same chain.
17636       if (LoadNodes[i].MemNode->getChain() != LoadChain)
17637         break;
17638
17639       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
17640       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17641         break;
17642       LastConsecutiveLoad = i;
17643
17644       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
17645         isDereferenceable = false;
17646
17647       // Find a legal type for the vector store.
17648       unsigned Elts = (i + 1) * NumMemElts;
17649       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17650
17651       // Break early when size is too large to be legal.
17652       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17653         break;
17654
17655       bool IsFastSt = false;
17656       bool IsFastLd = false;
17657       if (TLI.isTypeLegal(StoreTy) &&
17658           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17659                                DAG.getMachineFunction()) &&
17660           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17661                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
17662           IsFastSt &&
17663           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17664                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
17665           IsFastLd) {
17666         LastLegalVectorType = i + 1;
17667       }
17668
17669       // Find a legal type for the integer store.
17670       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17671       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17672       if (TLI.isTypeLegal(StoreTy) &&
17673           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
17674                                DAG.getMachineFunction()) &&
17675           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17676                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
17677           IsFastSt &&
17678           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17679                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
17680           IsFastLd) {
17681         LastLegalIntegerType = i + 1;
17682         DoIntegerTruncate = false;
17683         // Or check whether a truncstore and extload is legal.
17684       } else if (TLI.getTypeAction(Context, StoreTy) ==
17685                  TargetLowering::TypePromoteInteger) {
17686         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
17687         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17688             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
17689                                  DAG.getMachineFunction()) &&
17690             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17691             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17692             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
17693             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17694                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
17695             IsFastSt &&
17696             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17697                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
17698             IsFastLd) {
17699           LastLegalIntegerType = i + 1;
17700           DoIntegerTruncate = true;
17701         }
17702       }
17703     }
17704
17705     // Only use vector types if the vector type is larger than the integer
17706     // type. If they are the same, use integers.
17707     bool UseVectorTy =
17708         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
17709     unsigned LastLegalType =
17710         std::max(LastLegalVectorType, LastLegalIntegerType);
17711
17712     // We add +1 here because the LastXXX variables refer to location while
17713     // the NumElem refers to array/index size.
17714     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
17715     NumElem = std::min(LastLegalType, NumElem);
17716     Align FirstLoadAlign = FirstLoad->getAlign();
17717
17718     if (NumElem < 2) {
17719       // We know that candidate stores are in order and of correct
17720       // shape. While there is no mergeable sequence from the
17721       // beginning one may start later in the sequence. The only
17722       // reason a merge of size N could have failed where another of
17723       // the same size would not have is if the alignment or either
17724       // the load or store has improved. Drop as many candidates as we
17725       // can here.
17726       unsigned NumSkip = 1;
17727       while ((NumSkip < LoadNodes.size()) &&
17728              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
17729              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
17730         NumSkip++;
17731       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17732       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
17733       NumConsecutiveStores -= NumSkip;
17734       continue;
17735     }
17736
17737     // Check that we can merge these candidates without causing a cycle.
17738     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17739                                                   RootNode)) {
17740       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17741       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17742       NumConsecutiveStores -= NumElem;
17743       continue;
17744     }
17745
17746     // Find if it is better to use vectors or integers to load and store
17747     // to memory.
17748     EVT JointMemOpVT;
17749     if (UseVectorTy) {
17750       // Find a legal type for the vector store.
17751       unsigned Elts = NumElem * NumMemElts;
17752       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17753     } else {
17754       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
17755       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
17756     }
17757
17758     SDLoc LoadDL(LoadNodes[0].MemNode);
17759     SDLoc StoreDL(StoreNodes[0].MemNode);
17760
17761     // The merged loads are required to have the same incoming chain, so
17762     // using the first's chain is acceptable.
17763
17764     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
17765     AddToWorklist(NewStoreChain.getNode());
17766
17767     MachineMemOperand::Flags LdMMOFlags =
17768         isDereferenceable ? MachineMemOperand::MODereferenceable
17769                           : MachineMemOperand::MONone;
17770     if (IsNonTemporalLoad)
17771       LdMMOFlags |= MachineMemOperand::MONonTemporal;
17772
17773     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
17774                                               ? MachineMemOperand::MONonTemporal
17775                                               : MachineMemOperand::MONone;
17776
17777     SDValue NewLoad, NewStore;
17778     if (UseVectorTy || !DoIntegerTruncate) {
17779       NewLoad = DAG.getLoad(
17780           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
17781           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
17782       SDValue StoreOp = NewLoad;
17783       if (NeedRotate) {
17784         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
17785         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
17786                "Unexpected type for rotate-able load pair");
17787         SDValue RotAmt =
17788             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
17789         // Target can convert to the identical ROTR if it does not have ROTL.
17790         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
17791       }
17792       NewStore = DAG.getStore(
17793           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
17794           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
17795     } else { // This must be the truncstore/extload case
17796       EVT ExtendedTy =
17797           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
17798       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
17799                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
17800                                FirstLoad->getPointerInfo(), JointMemOpVT,
17801                                FirstLoadAlign, LdMMOFlags);
17802       NewStore = DAG.getTruncStore(
17803           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
17804           FirstInChain->getPointerInfo(), JointMemOpVT,
17805           FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17806     }
17807
17808     // Transfer chain users from old loads to the new load.
17809     for (unsigned i = 0; i < NumElem; ++i) {
17810       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
17811       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
17812                                     SDValue(NewLoad.getNode(), 1));
17813     }
17814
17815     // Replace all stores with the new store. Recursively remove corresponding
17816     // values if they are no longer used.
17817     for (unsigned i = 0; i < NumElem; ++i) {
17818       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
17819       CombineTo(StoreNodes[i].MemNode, NewStore);
17820       if (Val.getNode()->use_empty())
17821         recursivelyDeleteUnusedNodes(Val.getNode());
17822     }
17823
17824     MadeChange = true;
17825     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17826     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17827     NumConsecutiveStores -= NumElem;
17828   }
17829   return MadeChange;
17830 }
17831
17832 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
17833   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
17834     return false;
17835
17836   // TODO: Extend this function to merge stores of scalable vectors.
17837   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
17838   // store since we know <vscale x 16 x i8> is exactly twice as large as
17839   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
17840   EVT MemVT = St->getMemoryVT();
17841   if (MemVT.isScalableVector())
17842     return false;
17843   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
17844     return false;
17845
17846   // This function cannot currently deal with non-byte-sized memory sizes.
17847   int64_t ElementSizeBytes = MemVT.getStoreSize();
17848   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
17849     return false;
17850
17851   // Do not bother looking at stored values that are not constants, loads, or
17852   // extracted vector elements.
17853   SDValue StoredVal = peekThroughBitcasts(St->getValue());
17854   const StoreSource StoreSrc = getStoreSource(StoredVal);
17855   if (StoreSrc == StoreSource::Unknown)
17856     return false;
17857
17858   SmallVector<MemOpLink, 8> StoreNodes;
17859   SDNode *RootNode;
17860   // Find potential store merge candidates by searching through chain sub-DAG
17861   getStoreMergeCandidates(St, StoreNodes, RootNode);
17862
17863   // Check if there is anything to merge.
17864   if (StoreNodes.size() < 2)
17865     return false;
17866
17867   // Sort the memory operands according to their distance from the
17868   // base pointer.
17869   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
17870     return LHS.OffsetFromBase < RHS.OffsetFromBase;
17871   });
17872
17873   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
17874       Attribute::NoImplicitFloat);
17875   bool IsNonTemporalStore = St->isNonTemporal();
17876   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
17877                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
17878
17879   // Store Merge attempts to merge the lowest stores. This generally
17880   // works out as if successful, as the remaining stores are checked
17881   // after the first collection of stores is merged. However, in the
17882   // case that a non-mergeable store is found first, e.g., {p[-2],
17883   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
17884   // mergeable cases. To prevent this, we prune such stores from the
17885   // front of StoreNodes here.
17886   bool MadeChange = false;
17887   while (StoreNodes.size() > 1) {
17888     unsigned NumConsecutiveStores =
17889         getConsecutiveStores(StoreNodes, ElementSizeBytes);
17890     // There are no more stores in the list to examine.
17891     if (NumConsecutiveStores == 0)
17892       return MadeChange;
17893
17894     // We have at least 2 consecutive stores. Try to merge them.
17895     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
17896     switch (StoreSrc) {
17897     case StoreSource::Constant:
17898       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
17899                                              MemVT, RootNode, AllowVectors);
17900       break;
17901
17902     case StoreSource::Extract:
17903       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
17904                                             MemVT, RootNode);
17905       break;
17906
17907     case StoreSource::Load:
17908       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
17909                                          MemVT, RootNode, AllowVectors,
17910                                          IsNonTemporalStore, IsNonTemporalLoad);
17911       break;
17912
17913     default:
17914       llvm_unreachable("Unhandled store source type");
17915     }
17916   }
17917   return MadeChange;
17918 }
17919
17920 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
17921   SDLoc SL(ST);
17922   SDValue ReplStore;
17923
17924   // Replace the chain to avoid dependency.
17925   if (ST->isTruncatingStore()) {
17926     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
17927                                   ST->getBasePtr(), ST->getMemoryVT(),
17928                                   ST->getMemOperand());
17929   } else {
17930     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
17931                              ST->getMemOperand());
17932   }
17933
17934   // Create token to keep both nodes around.
17935   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
17936                               MVT::Other, ST->getChain(), ReplStore);
17937
17938   // Make sure the new and old chains are cleaned up.
17939   AddToWorklist(Token.getNode());
17940
17941   // Don't add users to work list.
17942   return CombineTo(ST, Token, false);
17943 }
17944
17945 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
17946   SDValue Value = ST->getValue();
17947   if (Value.getOpcode() == ISD::TargetConstantFP)
17948     return SDValue();
17949
17950   if (!ISD::isNormalStore(ST))
17951     return SDValue();
17952
17953   SDLoc DL(ST);
17954
17955   SDValue Chain = ST->getChain();
17956   SDValue Ptr = ST->getBasePtr();
17957
17958   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
17959
17960   // NOTE: If the original store is volatile, this transform must not increase
17961   // the number of stores.  For example, on x86-32 an f64 can be stored in one
17962   // processor operation but an i64 (which is not legal) requires two.  So the
17963   // transform should not be done in this case.
17964
17965   SDValue Tmp;
17966   switch (CFP->getSimpleValueType(0).SimpleTy) {
17967   default:
17968     llvm_unreachable("Unknown FP type");
17969   case MVT::f16:    // We don't do this for these yet.
17970   case MVT::f80:
17971   case MVT::f128:
17972   case MVT::ppcf128:
17973     return SDValue();
17974   case MVT::f32:
17975     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
17976         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17977       ;
17978       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
17979                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
17980                             MVT::i32);
17981       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
17982     }
17983
17984     return SDValue();
17985   case MVT::f64:
17986     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
17987          ST->isSimple()) ||
17988         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
17989       ;
17990       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
17991                             getZExtValue(), SDLoc(CFP), MVT::i64);
17992       return DAG.getStore(Chain, DL, Tmp,
17993                           Ptr, ST->getMemOperand());
17994     }
17995
17996     if (ST->isSimple() &&
17997         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17998       // Many FP stores are not made apparent until after legalize, e.g. for
17999       // argument passing.  Since this is so common, custom legalize the
18000       // 64-bit integer store into two 32-bit stores.
18001       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
18002       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18003       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18004       if (DAG.getDataLayout().isBigEndian())
18005         std::swap(Lo, Hi);
18006
18007       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18008       AAMDNodes AAInfo = ST->getAAInfo();
18009
18010       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18011                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18012       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18013       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18014                                  ST->getPointerInfo().getWithOffset(4),
18015                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18016       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
18017                          St0, St1);
18018     }
18019
18020     return SDValue();
18021   }
18022 }
18023
18024 SDValue DAGCombiner::visitSTORE(SDNode *N) {
18025   StoreSDNode *ST  = cast<StoreSDNode>(N);
18026   SDValue Chain = ST->getChain();
18027   SDValue Value = ST->getValue();
18028   SDValue Ptr   = ST->getBasePtr();
18029
18030   // If this is a store of a bit convert, store the input value if the
18031   // resultant store does not need a higher alignment than the original.
18032   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18033       ST->isUnindexed()) {
18034     EVT SVT = Value.getOperand(0).getValueType();
18035     // If the store is volatile, we only want to change the store type if the
18036     // resulting store is legal. Otherwise we might increase the number of
18037     // memory accesses. We don't care if the original type was legal or not
18038     // as we assume software couldn't rely on the number of accesses of an
18039     // illegal type.
18040     // TODO: May be able to relax for unordered atomics (see D66309)
18041     if (((!LegalOperations && ST->isSimple()) ||
18042          TLI.isOperationLegal(ISD::STORE, SVT)) &&
18043         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18044                                      DAG, *ST->getMemOperand())) {
18045       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18046                           ST->getMemOperand());
18047     }
18048   }
18049
18050   // Turn 'store undef, Ptr' -> nothing.
18051   if (Value.isUndef() && ST->isUnindexed())
18052     return Chain;
18053
18054   // Try to infer better alignment information than the store already has.
18055   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18056     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18057       if (*Alignment > ST->getAlign() &&
18058           isAligned(*Alignment, ST->getSrcValueOffset())) {
18059         SDValue NewStore =
18060             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18061                               ST->getMemoryVT(), *Alignment,
18062                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
18063         // NewStore will always be N as we are only refining the alignment
18064         assert(NewStore.getNode() == N);
18065         (void)NewStore;
18066       }
18067     }
18068   }
18069
18070   // Try transforming a pair floating point load / store ops to integer
18071   // load / store ops.
18072   if (SDValue NewST = TransformFPLoadStorePair(N))
18073     return NewST;
18074
18075   // Try transforming several stores into STORE (BSWAP).
18076   if (SDValue Store = mergeTruncStores(ST))
18077     return Store;
18078
18079   if (ST->isUnindexed()) {
18080     // Walk up chain skipping non-aliasing memory nodes, on this store and any
18081     // adjacent stores.
18082     if (findBetterNeighborChains(ST)) {
18083       // replaceStoreChain uses CombineTo, which handled all of the worklist
18084       // manipulation. Return the original node to not do anything else.
18085       return SDValue(ST, 0);
18086     }
18087     Chain = ST->getChain();
18088   }
18089
18090   // FIXME: is there such a thing as a truncating indexed store?
18091   if (ST->isTruncatingStore() && ST->isUnindexed() &&
18092       Value.getValueType().isInteger() &&
18093       (!isa<ConstantSDNode>(Value) ||
18094        !cast<ConstantSDNode>(Value)->isOpaque())) {
18095     APInt TruncDemandedBits =
18096         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18097                              ST->getMemoryVT().getScalarSizeInBits());
18098
18099     // See if we can simplify the input to this truncstore with knowledge that
18100     // only the low bits are being used.  For example:
18101     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
18102     AddToWorklist(Value.getNode());
18103     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18104       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18105                                ST->getMemOperand());
18106
18107     // Otherwise, see if we can simplify the operation with
18108     // SimplifyDemandedBits, which only works if the value has a single use.
18109     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18110       // Re-visit the store if anything changed and the store hasn't been merged
18111       // with another node (N is deleted) SimplifyDemandedBits will add Value's
18112       // node back to the worklist if necessary, but we also need to re-visit
18113       // the Store node itself.
18114       if (N->getOpcode() != ISD::DELETED_NODE)
18115         AddToWorklist(N);
18116       return SDValue(N, 0);
18117     }
18118   }
18119
18120   // If this is a load followed by a store to the same location, then the store
18121   // is dead/noop.
18122   // TODO: Can relax for unordered atomics (see D66309)
18123   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
18124     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18125         ST->isUnindexed() && ST->isSimple() &&
18126         Ld->getAddressSpace() == ST->getAddressSpace() &&
18127         // There can't be any side effects between the load and store, such as
18128         // a call or store.
18129         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18130       // The store is dead, remove it.
18131       return Chain;
18132     }
18133   }
18134
18135   // TODO: Can relax for unordered atomics (see D66309)
18136   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18137     if (ST->isUnindexed() && ST->isSimple() &&
18138         ST1->isUnindexed() && ST1->isSimple()) {
18139       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
18140           ST->getMemoryVT() == ST1->getMemoryVT() &&
18141           ST->getAddressSpace() == ST1->getAddressSpace()) {
18142         // If this is a store followed by a store with the same value to the
18143         // same location, then the store is dead/noop.
18144         return Chain;
18145       }
18146
18147       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18148           !ST1->getBasePtr().isUndef() &&
18149           // BaseIndexOffset and the code below requires knowing the size
18150           // of a vector, so bail out if MemoryVT is scalable.
18151           !ST->getMemoryVT().isScalableVector() &&
18152           !ST1->getMemoryVT().isScalableVector() &&
18153           ST->getAddressSpace() == ST1->getAddressSpace()) {
18154         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
18155         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
18156         unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18157         unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18158         // If this is a store who's preceding store to a subset of the current
18159         // location and no one other node is chained to that store we can
18160         // effectively drop the store. Do not remove stores to undef as they may
18161         // be used as data sinks.
18162         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18163           CombineTo(ST1, ST1->getChain());
18164           return SDValue();
18165         }
18166       }
18167     }
18168   }
18169
18170   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18171   // truncating store.  We can do this even if this is already a truncstore.
18172   if ((Value.getOpcode() == ISD::FP_ROUND ||
18173        Value.getOpcode() == ISD::TRUNCATE) &&
18174       Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18175       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18176                                ST->getMemoryVT(), LegalOperations)) {
18177     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18178                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
18179   }
18180
18181   // Always perform this optimization before types are legal. If the target
18182   // prefers, also try this after legalization to catch stores that were created
18183   // by intrinsics or other nodes.
18184   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18185     while (true) {
18186       // There can be multiple store sequences on the same chain.
18187       // Keep trying to merge store sequences until we are unable to do so
18188       // or until we merge the last store on the chain.
18189       bool Changed = mergeConsecutiveStores(ST);
18190       if (!Changed) break;
18191       // Return N as merge only uses CombineTo and no worklist clean
18192       // up is necessary.
18193       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18194         return SDValue(N, 0);
18195     }
18196   }
18197
18198   // Try transforming N to an indexed store.
18199   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18200     return SDValue(N, 0);
18201
18202   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18203   //
18204   // Make sure to do this only after attempting to merge stores in order to
18205   //  avoid changing the types of some subset of stores due to visit order,
18206   //  preventing their merging.
18207   if (isa<ConstantFPSDNode>(ST->getValue())) {
18208     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
18209       return NewSt;
18210   }
18211
18212   if (SDValue NewSt = splitMergedValStore(ST))
18213     return NewSt;
18214
18215   return ReduceLoadOpStoreWidth(N);
18216 }
18217
18218 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18219   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18220   if (!LifetimeEnd->hasOffset())
18221     return SDValue();
18222
18223   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18224                                         LifetimeEnd->getOffset(), false);
18225
18226   // We walk up the chains to find stores.
18227   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18228   while (!Chains.empty()) {
18229     SDValue Chain = Chains.pop_back_val();
18230     if (!Chain.hasOneUse())
18231       continue;
18232     switch (Chain.getOpcode()) {
18233     case ISD::TokenFactor:
18234       for (unsigned Nops = Chain.getNumOperands(); Nops;)
18235         Chains.push_back(Chain.getOperand(--Nops));
18236       break;
18237     case ISD::LIFETIME_START:
18238     case ISD::LIFETIME_END:
18239       // We can forward past any lifetime start/end that can be proven not to
18240       // alias the node.
18241       if (!isAlias(Chain.getNode(), N))
18242         Chains.push_back(Chain.getOperand(0));
18243       break;
18244     case ISD::STORE: {
18245       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
18246       // TODO: Can relax for unordered atomics (see D66309)
18247       if (!ST->isSimple() || ST->isIndexed())
18248         continue;
18249       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18250       // The bounds of a scalable store are not known until runtime, so this
18251       // store cannot be elided.
18252       if (StoreSize.isScalable())
18253         continue;
18254       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
18255       // If we store purely within object bounds just before its lifetime ends,
18256       // we can remove the store.
18257       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18258                                    StoreSize.getFixedSize() * 8)) {
18259         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
18260                    dbgs() << "\nwithin LIFETIME_END of : ";
18261                    LifetimeEndBase.dump(); dbgs() << "\n");
18262         CombineTo(ST, ST->getChain());
18263         return SDValue(N, 0);
18264       }
18265     }
18266     }
18267   }
18268   return SDValue();
18269 }
18270
18271 /// For the instruction sequence of store below, F and I values
18272 /// are bundled together as an i64 value before being stored into memory.
18273 /// Sometimes it is more efficent to generate separate stores for F and I,
18274 /// which can remove the bitwise instructions or sink them to colder places.
18275 ///
18276 ///   (store (or (zext (bitcast F to i32) to i64),
18277 ///              (shl (zext I to i64), 32)), addr)  -->
18278 ///   (store F, addr) and (store I, addr+4)
18279 ///
18280 /// Similarly, splitting for other merged store can also be beneficial, like:
18281 /// For pair of {i32, i32}, i64 store --> two i32 stores.
18282 /// For pair of {i32, i16}, i64 store --> two i32 stores.
18283 /// For pair of {i16, i16}, i32 store --> two i16 stores.
18284 /// For pair of {i16, i8},  i32 store --> two i16 stores.
18285 /// For pair of {i8, i8},   i16 store --> two i8 stores.
18286 ///
18287 /// We allow each target to determine specifically which kind of splitting is
18288 /// supported.
18289 ///
18290 /// The store patterns are commonly seen from the simple code snippet below
18291 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18292 ///   void goo(const std::pair<int, float> &);
18293 ///   hoo() {
18294 ///     ...
18295 ///     goo(std::make_pair(tmp, ftmp));
18296 ///     ...
18297 ///   }
18298 ///
18299 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18300   if (OptLevel == CodeGenOpt::None)
18301     return SDValue();
18302
18303   // Can't change the number of memory accesses for a volatile store or break
18304   // atomicity for an atomic one.
18305   if (!ST->isSimple())
18306     return SDValue();
18307
18308   SDValue Val = ST->getValue();
18309   SDLoc DL(ST);
18310
18311   // Match OR operand.
18312   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18313     return SDValue();
18314
18315   // Match SHL operand and get Lower and Higher parts of Val.
18316   SDValue Op1 = Val.getOperand(0);
18317   SDValue Op2 = Val.getOperand(1);
18318   SDValue Lo, Hi;
18319   if (Op1.getOpcode() != ISD::SHL) {
18320     std::swap(Op1, Op2);
18321     if (Op1.getOpcode() != ISD::SHL)
18322       return SDValue();
18323   }
18324   Lo = Op2;
18325   Hi = Op1.getOperand(0);
18326   if (!Op1.hasOneUse())
18327     return SDValue();
18328
18329   // Match shift amount to HalfValBitSize.
18330   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18331   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18332   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18333     return SDValue();
18334
18335   // Lo and Hi are zero-extended from int with size less equal than 32
18336   // to i64.
18337   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18338       !Lo.getOperand(0).getValueType().isScalarInteger() ||
18339       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18340       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18341       !Hi.getOperand(0).getValueType().isScalarInteger() ||
18342       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18343     return SDValue();
18344
18345   // Use the EVT of low and high parts before bitcast as the input
18346   // of target query.
18347   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18348                   ? Lo.getOperand(0).getValueType()
18349                   : Lo.getValueType();
18350   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18351                    ? Hi.getOperand(0).getValueType()
18352                    : Hi.getValueType();
18353   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18354     return SDValue();
18355
18356   // Start to split store.
18357   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18358   AAMDNodes AAInfo = ST->getAAInfo();
18359
18360   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18361   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18362   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18363   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18364
18365   SDValue Chain = ST->getChain();
18366   SDValue Ptr = ST->getBasePtr();
18367   // Lower value store.
18368   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18369                              ST->getOriginalAlign(), MMOFlags, AAInfo);
18370   Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18371   // Higher value store.
18372   SDValue St1 = DAG.getStore(
18373       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18374       ST->getOriginalAlign(), MMOFlags, AAInfo);
18375   return St1;
18376 }
18377
18378 /// Convert a disguised subvector insertion into a shuffle:
18379 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18380   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
18381          "Expected extract_vector_elt");
18382   SDValue InsertVal = N->getOperand(1);
18383   SDValue Vec = N->getOperand(0);
18384
18385   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18386   // InsIndex)
18387   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
18388   //   CONCAT_VECTORS.
18389   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18390       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18391       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18392     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18393     ArrayRef<int> Mask = SVN->getMask();
18394
18395     SDValue X = Vec.getOperand(0);
18396     SDValue Y = Vec.getOperand(1);
18397
18398     // Vec's operand 0 is using indices from 0 to N-1 and
18399     // operand 1 from N to 2N - 1, where N is the number of
18400     // elements in the vectors.
18401     SDValue InsertVal0 = InsertVal.getOperand(0);
18402     int ElementOffset = -1;
18403
18404     // We explore the inputs of the shuffle in order to see if we find the
18405     // source of the extract_vector_elt. If so, we can use it to modify the
18406     // shuffle rather than perform an insert_vector_elt.
18407     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18408     ArgWorkList.emplace_back(Mask.size(), Y);
18409     ArgWorkList.emplace_back(0, X);
18410
18411     while (!ArgWorkList.empty()) {
18412       int ArgOffset;
18413       SDValue ArgVal;
18414       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18415
18416       if (ArgVal == InsertVal0) {
18417         ElementOffset = ArgOffset;
18418         break;
18419       }
18420
18421       // Peek through concat_vector.
18422       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18423         int CurrentArgOffset =
18424             ArgOffset + ArgVal.getValueType().getVectorNumElements();
18425         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18426         for (SDValue Op : reverse(ArgVal->ops())) {
18427           CurrentArgOffset -= Step;
18428           ArgWorkList.emplace_back(CurrentArgOffset, Op);
18429         }
18430
18431         // Make sure we went through all the elements and did not screw up index
18432         // computation.
18433         assert(CurrentArgOffset == ArgOffset);
18434       }
18435     }
18436
18437     if (ElementOffset != -1) {
18438       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18439
18440       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18441       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18442       assert(NewMask[InsIndex] <
18443                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
18444              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
18445
18446       SDValue LegalShuffle =
18447               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
18448                                           Y, NewMask, DAG);
18449       if (LegalShuffle)
18450         return LegalShuffle;
18451     }
18452   }
18453
18454   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18455   // bitcast(shuffle (bitcast V), (extended X), Mask)
18456   // Note: We do not use an insert_subvector node because that requires a
18457   // legal subvector type.
18458   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18459       !InsertVal.getOperand(0).getValueType().isVector())
18460     return SDValue();
18461
18462   SDValue SubVec = InsertVal.getOperand(0);
18463   SDValue DestVec = N->getOperand(0);
18464   EVT SubVecVT = SubVec.getValueType();
18465   EVT VT = DestVec.getValueType();
18466   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18467   // If the source only has a single vector element, the cost of creating adding
18468   // it to a vector is likely to exceed the cost of a insert_vector_elt.
18469   if (NumSrcElts == 1)
18470     return SDValue();
18471   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18472   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18473
18474   // Step 1: Create a shuffle mask that implements this insert operation. The
18475   // vector that we are inserting into will be operand 0 of the shuffle, so
18476   // those elements are just 'i'. The inserted subvector is in the first
18477   // positions of operand 1 of the shuffle. Example:
18478   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18479   SmallVector<int, 16> Mask(NumMaskVals);
18480   for (unsigned i = 0; i != NumMaskVals; ++i) {
18481     if (i / NumSrcElts == InsIndex)
18482       Mask[i] = (i % NumSrcElts) + NumMaskVals;
18483     else
18484       Mask[i] = i;
18485   }
18486
18487   // Bail out if the target can not handle the shuffle we want to create.
18488   EVT SubVecEltVT = SubVecVT.getVectorElementType();
18489   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18490   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18491     return SDValue();
18492
18493   // Step 2: Create a wide vector from the inserted source vector by appending
18494   // undefined elements. This is the same size as our destination vector.
18495   SDLoc DL(N);
18496   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18497   ConcatOps[0] = SubVec;
18498   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18499
18500   // Step 3: Shuffle in the padded subvector.
18501   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18502   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18503   AddToWorklist(PaddedSubV.getNode());
18504   AddToWorklist(DestVecBC.getNode());
18505   AddToWorklist(Shuf.getNode());
18506   return DAG.getBitcast(VT, Shuf);
18507 }
18508
18509 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18510   SDValue InVec = N->getOperand(0);
18511   SDValue InVal = N->getOperand(1);
18512   SDValue EltNo = N->getOperand(2);
18513   SDLoc DL(N);
18514
18515   EVT VT = InVec.getValueType();
18516   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18517
18518   // Insert into out-of-bounds element is undefined.
18519   if (IndexC && VT.isFixedLengthVector() &&
18520       IndexC->getZExtValue() >= VT.getVectorNumElements())
18521     return DAG.getUNDEF(VT);
18522
18523   // Remove redundant insertions:
18524   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18525   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18526       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18527     return InVec;
18528
18529   if (!IndexC) {
18530     // If this is variable insert to undef vector, it might be better to splat:
18531     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18532     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18533       if (VT.isScalableVector())
18534         return DAG.getSplatVector(VT, DL, InVal);
18535       else {
18536         SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
18537         return DAG.getBuildVector(VT, DL, Ops);
18538       }
18539     }
18540     return SDValue();
18541   }
18542
18543   if (VT.isScalableVector())
18544     return SDValue();
18545
18546   unsigned NumElts = VT.getVectorNumElements();
18547
18548   // We must know which element is being inserted for folds below here.
18549   unsigned Elt = IndexC->getZExtValue();
18550   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18551     return Shuf;
18552
18553   // Canonicalize insert_vector_elt dag nodes.
18554   // Example:
18555   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18556   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18557   //
18558   // Do this only if the child insert_vector node has one use; also
18559   // do this only if indices are both constants and Idx1 < Idx0.
18560   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
18561       && isa<ConstantSDNode>(InVec.getOperand(2))) {
18562     unsigned OtherElt = InVec.getConstantOperandVal(2);
18563     if (Elt < OtherElt) {
18564       // Swap nodes.
18565       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
18566                                   InVec.getOperand(0), InVal, EltNo);
18567       AddToWorklist(NewOp.getNode());
18568       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
18569                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
18570     }
18571   }
18572
18573   // If we can't generate a legal BUILD_VECTOR, exit
18574   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
18575     return SDValue();
18576
18577   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
18578   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
18579   // vector elements.
18580   SmallVector<SDValue, 8> Ops;
18581   // Do not combine these two vectors if the output vector will not replace
18582   // the input vector.
18583   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
18584     Ops.append(InVec.getNode()->op_begin(),
18585                InVec.getNode()->op_end());
18586   } else if (InVec.isUndef()) {
18587     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
18588   } else {
18589     return SDValue();
18590   }
18591   assert(Ops.size() == NumElts && "Unexpected vector size");
18592
18593   // Insert the element
18594   if (Elt < Ops.size()) {
18595     // All the operands of BUILD_VECTOR must have the same type;
18596     // we enforce that here.
18597     EVT OpVT = Ops[0].getValueType();
18598     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
18599   }
18600
18601   // Return the new vector
18602   return DAG.getBuildVector(VT, DL, Ops);
18603 }
18604
18605 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
18606                                                   SDValue EltNo,
18607                                                   LoadSDNode *OriginalLoad) {
18608   assert(OriginalLoad->isSimple());
18609
18610   EVT ResultVT = EVE->getValueType(0);
18611   EVT VecEltVT = InVecVT.getVectorElementType();
18612
18613   // If the vector element type is not a multiple of a byte then we are unable
18614   // to correctly compute an address to load only the extracted element as a
18615   // scalar.
18616   if (!VecEltVT.isByteSized())
18617     return SDValue();
18618
18619   Align Alignment = OriginalLoad->getAlign();
18620   Align NewAlign = DAG.getDataLayout().getABITypeAlign(
18621       VecEltVT.getTypeForEVT(*DAG.getContext()));
18622
18623   if (NewAlign > Alignment ||
18624       !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
18625     return SDValue();
18626
18627   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
18628     ISD::NON_EXTLOAD : ISD::EXTLOAD;
18629   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
18630     return SDValue();
18631
18632   Alignment = NewAlign;
18633
18634   MachinePointerInfo MPI;
18635   SDLoc DL(EVE);
18636   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
18637     int Elt = ConstEltNo->getZExtValue();
18638     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
18639     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
18640   } else {
18641     // Discard the pointer info except the address space because the memory
18642     // operand can't represent this new access since the offset is variable.
18643     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
18644   }
18645   SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
18646                                                InVecVT, EltNo);
18647
18648   // The replacement we need to do here is a little tricky: we need to
18649   // replace an extractelement of a load with a load.
18650   // Use ReplaceAllUsesOfValuesWith to do the replacement.
18651   // Note that this replacement assumes that the extractvalue is the only
18652   // use of the load; that's okay because we don't want to perform this
18653   // transformation in other cases anyway.
18654   SDValue Load;
18655   SDValue Chain;
18656   if (ResultVT.bitsGT(VecEltVT)) {
18657     // If the result type of vextract is wider than the load, then issue an
18658     // extending load instead.
18659     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
18660                                                   VecEltVT)
18661                                    ? ISD::ZEXTLOAD
18662                                    : ISD::EXTLOAD;
18663     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
18664                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
18665                           Alignment, OriginalLoad->getMemOperand()->getFlags(),
18666                           OriginalLoad->getAAInfo());
18667     Chain = Load.getValue(1);
18668   } else {
18669     Load = DAG.getLoad(
18670         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
18671         OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
18672     Chain = Load.getValue(1);
18673     if (ResultVT.bitsLT(VecEltVT))
18674       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
18675     else
18676       Load = DAG.getBitcast(ResultVT, Load);
18677   }
18678   WorklistRemover DeadNodes(*this);
18679   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
18680   SDValue To[] = { Load, Chain };
18681   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
18682   // Make sure to revisit this node to clean it up; it will usually be dead.
18683   AddToWorklist(EVE);
18684   // Since we're explicitly calling ReplaceAllUses, add the new node to the
18685   // worklist explicitly as well.
18686   AddToWorklistWithUsers(Load.getNode());
18687   ++OpsNarrowed;
18688   return SDValue(EVE, 0);
18689 }
18690
18691 /// Transform a vector binary operation into a scalar binary operation by moving
18692 /// the math/logic after an extract element of a vector.
18693 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
18694                                        bool LegalOperations) {
18695   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18696   SDValue Vec = ExtElt->getOperand(0);
18697   SDValue Index = ExtElt->getOperand(1);
18698   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18699   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
18700       Vec.getNode()->getNumValues() != 1)
18701     return SDValue();
18702
18703   // Targets may want to avoid this to prevent an expensive register transfer.
18704   if (!TLI.shouldScalarizeBinop(Vec))
18705     return SDValue();
18706
18707   // Extracting an element of a vector constant is constant-folded, so this
18708   // transform is just replacing a vector op with a scalar op while moving the
18709   // extract.
18710   SDValue Op0 = Vec.getOperand(0);
18711   SDValue Op1 = Vec.getOperand(1);
18712   if (isAnyConstantBuildVector(Op0, true) ||
18713       isAnyConstantBuildVector(Op1, true)) {
18714     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
18715     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
18716     SDLoc DL(ExtElt);
18717     EVT VT = ExtElt->getValueType(0);
18718     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
18719     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
18720     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
18721   }
18722
18723   return SDValue();
18724 }
18725
18726 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
18727   SDValue VecOp = N->getOperand(0);
18728   SDValue Index = N->getOperand(1);
18729   EVT ScalarVT = N->getValueType(0);
18730   EVT VecVT = VecOp.getValueType();
18731   if (VecOp.isUndef())
18732     return DAG.getUNDEF(ScalarVT);
18733
18734   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
18735   //
18736   // This only really matters if the index is non-constant since other combines
18737   // on the constant elements already work.
18738   SDLoc DL(N);
18739   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
18740       Index == VecOp.getOperand(2)) {
18741     SDValue Elt = VecOp.getOperand(1);
18742     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
18743   }
18744
18745   // (vextract (scalar_to_vector val, 0) -> val
18746   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18747     // Only 0'th element of SCALAR_TO_VECTOR is defined.
18748     if (DAG.isKnownNeverZero(Index))
18749       return DAG.getUNDEF(ScalarVT);
18750
18751     // Check if the result type doesn't match the inserted element type. A
18752     // SCALAR_TO_VECTOR may truncate the inserted element and the
18753     // EXTRACT_VECTOR_ELT may widen the extracted vector.
18754     SDValue InOp = VecOp.getOperand(0);
18755     if (InOp.getValueType() != ScalarVT) {
18756       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
18757       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18758     }
18759     return InOp;
18760   }
18761
18762   // extract_vector_elt of out-of-bounds element -> UNDEF
18763   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18764   if (IndexC && VecVT.isFixedLengthVector() &&
18765       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
18766     return DAG.getUNDEF(ScalarVT);
18767
18768   // extract_vector_elt (build_vector x, y), 1 -> y
18769   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
18770        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
18771       TLI.isTypeLegal(VecVT) &&
18772       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
18773     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
18774             VecVT.isFixedLengthVector()) &&
18775            "BUILD_VECTOR used for scalable vectors");
18776     unsigned IndexVal =
18777         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
18778     SDValue Elt = VecOp.getOperand(IndexVal);
18779     EVT InEltVT = Elt.getValueType();
18780
18781     // Sometimes build_vector's scalar input types do not match result type.
18782     if (ScalarVT == InEltVT)
18783       return Elt;
18784
18785     // TODO: It may be useful to truncate if free if the build_vector implicitly
18786     // converts.
18787   }
18788
18789   if (VecVT.isScalableVector())
18790     return SDValue();
18791
18792   // All the code from this point onwards assumes fixed width vectors, but it's
18793   // possible that some of the combinations could be made to work for scalable
18794   // vectors too.
18795   unsigned NumElts = VecVT.getVectorNumElements();
18796   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
18797
18798   // TODO: These transforms should not require the 'hasOneUse' restriction, but
18799   // there are regressions on multiple targets without it. We can end up with a
18800   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
18801   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
18802       VecOp.hasOneUse()) {
18803     // The vector index of the LSBs of the source depend on the endian-ness.
18804     bool IsLE = DAG.getDataLayout().isLittleEndian();
18805     unsigned ExtractIndex = IndexC->getZExtValue();
18806     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
18807     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
18808     SDValue BCSrc = VecOp.getOperand(0);
18809     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
18810       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
18811
18812     if (LegalTypes && BCSrc.getValueType().isInteger() &&
18813         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18814       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
18815       // trunc i64 X to i32
18816       SDValue X = BCSrc.getOperand(0);
18817       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
18818              "Extract element and scalar to vector can't change element type "
18819              "from FP to integer.");
18820       unsigned XBitWidth = X.getValueSizeInBits();
18821       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
18822
18823       // An extract element return value type can be wider than its vector
18824       // operand element type. In that case, the high bits are undefined, so
18825       // it's possible that we may need to extend rather than truncate.
18826       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
18827         assert(XBitWidth % VecEltBitWidth == 0 &&
18828                "Scalar bitwidth must be a multiple of vector element bitwidth");
18829         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
18830       }
18831     }
18832   }
18833
18834   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
18835     return BO;
18836
18837   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
18838   // We only perform this optimization before the op legalization phase because
18839   // we may introduce new vector instructions which are not backed by TD
18840   // patterns. For example on AVX, extracting elements from a wide vector
18841   // without using extract_subvector. However, if we can find an underlying
18842   // scalar value, then we can always use that.
18843   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
18844     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
18845     // Find the new index to extract from.
18846     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
18847
18848     // Extracting an undef index is undef.
18849     if (OrigElt == -1)
18850       return DAG.getUNDEF(ScalarVT);
18851
18852     // Select the right vector half to extract from.
18853     SDValue SVInVec;
18854     if (OrigElt < (int)NumElts) {
18855       SVInVec = VecOp.getOperand(0);
18856     } else {
18857       SVInVec = VecOp.getOperand(1);
18858       OrigElt -= NumElts;
18859     }
18860
18861     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
18862       SDValue InOp = SVInVec.getOperand(OrigElt);
18863       if (InOp.getValueType() != ScalarVT) {
18864         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
18865         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18866       }
18867
18868       return InOp;
18869     }
18870
18871     // FIXME: We should handle recursing on other vector shuffles and
18872     // scalar_to_vector here as well.
18873
18874     if (!LegalOperations ||
18875         // FIXME: Should really be just isOperationLegalOrCustom.
18876         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
18877         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
18878       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
18879                          DAG.getVectorIdxConstant(OrigElt, DL));
18880     }
18881   }
18882
18883   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
18884   // simplify it based on the (valid) extraction indices.
18885   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
18886         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18887                Use->getOperand(0) == VecOp &&
18888                isa<ConstantSDNode>(Use->getOperand(1));
18889       })) {
18890     APInt DemandedElts = APInt::getNullValue(NumElts);
18891     for (SDNode *Use : VecOp->uses()) {
18892       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
18893       if (CstElt->getAPIntValue().ult(NumElts))
18894         DemandedElts.setBit(CstElt->getZExtValue());
18895     }
18896     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
18897       // We simplified the vector operand of this extract element. If this
18898       // extract is not dead, visit it again so it is folded properly.
18899       if (N->getOpcode() != ISD::DELETED_NODE)
18900         AddToWorklist(N);
18901       return SDValue(N, 0);
18902     }
18903     APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
18904     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
18905       // We simplified the vector operand of this extract element. If this
18906       // extract is not dead, visit it again so it is folded properly.
18907       if (N->getOpcode() != ISD::DELETED_NODE)
18908         AddToWorklist(N);
18909       return SDValue(N, 0);
18910     }
18911   }
18912
18913   // Everything under here is trying to match an extract of a loaded value.
18914   // If the result of load has to be truncated, then it's not necessarily
18915   // profitable.
18916   bool BCNumEltsChanged = false;
18917   EVT ExtVT = VecVT.getVectorElementType();
18918   EVT LVT = ExtVT;
18919   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
18920     return SDValue();
18921
18922   if (VecOp.getOpcode() == ISD::BITCAST) {
18923     // Don't duplicate a load with other uses.
18924     if (!VecOp.hasOneUse())
18925       return SDValue();
18926
18927     EVT BCVT = VecOp.getOperand(0).getValueType();
18928     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
18929       return SDValue();
18930     if (NumElts != BCVT.getVectorNumElements())
18931       BCNumEltsChanged = true;
18932     VecOp = VecOp.getOperand(0);
18933     ExtVT = BCVT.getVectorElementType();
18934   }
18935
18936   // extract (vector load $addr), i --> load $addr + i * size
18937   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
18938       ISD::isNormalLoad(VecOp.getNode()) &&
18939       !Index->hasPredecessor(VecOp.getNode())) {
18940     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
18941     if (VecLoad && VecLoad->isSimple())
18942       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
18943   }
18944
18945   // Perform only after legalization to ensure build_vector / vector_shuffle
18946   // optimizations have already been done.
18947   if (!LegalOperations || !IndexC)
18948     return SDValue();
18949
18950   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
18951   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
18952   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
18953   int Elt = IndexC->getZExtValue();
18954   LoadSDNode *LN0 = nullptr;
18955   if (ISD::isNormalLoad(VecOp.getNode())) {
18956     LN0 = cast<LoadSDNode>(VecOp);
18957   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18958              VecOp.getOperand(0).getValueType() == ExtVT &&
18959              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
18960     // Don't duplicate a load with other uses.
18961     if (!VecOp.hasOneUse())
18962       return SDValue();
18963
18964     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
18965   }
18966   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
18967     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
18968     // =>
18969     // (load $addr+1*size)
18970
18971     // Don't duplicate a load with other uses.
18972     if (!VecOp.hasOneUse())
18973       return SDValue();
18974
18975     // If the bit convert changed the number of elements, it is unsafe
18976     // to examine the mask.
18977     if (BCNumEltsChanged)
18978       return SDValue();
18979
18980     // Select the input vector, guarding against out of range extract vector.
18981     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
18982     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
18983
18984     if (VecOp.getOpcode() == ISD::BITCAST) {
18985       // Don't duplicate a load with other uses.
18986       if (!VecOp.hasOneUse())
18987         return SDValue();
18988
18989       VecOp = VecOp.getOperand(0);
18990     }
18991     if (ISD::isNormalLoad(VecOp.getNode())) {
18992       LN0 = cast<LoadSDNode>(VecOp);
18993       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
18994       Index = DAG.getConstant(Elt, DL, Index.getValueType());
18995     }
18996   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
18997              VecVT.getVectorElementType() == ScalarVT &&
18998              (!LegalTypes ||
18999               TLI.isTypeLegal(
19000                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
19001     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
19002     //      -> extract_vector_elt a, 0
19003     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
19004     //      -> extract_vector_elt a, 1
19005     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
19006     //      -> extract_vector_elt b, 0
19007     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
19008     //      -> extract_vector_elt b, 1
19009     SDLoc SL(N);
19010     EVT ConcatVT = VecOp.getOperand(0).getValueType();
19011     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19012     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
19013                                      Index.getValueType());
19014
19015     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
19016     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
19017                               ConcatVT.getVectorElementType(),
19018                               ConcatOp, NewIdx);
19019     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
19020   }
19021
19022   // Make sure we found a non-volatile load and the extractelement is
19023   // the only use.
19024   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19025     return SDValue();
19026
19027   // If Idx was -1 above, Elt is going to be -1, so just return undef.
19028   if (Elt == -1)
19029     return DAG.getUNDEF(LVT);
19030
19031   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19032 }
19033
19034 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
19035 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19036   // We perform this optimization post type-legalization because
19037   // the type-legalizer often scalarizes integer-promoted vectors.
19038   // Performing this optimization before may create bit-casts which
19039   // will be type-legalized to complex code sequences.
19040   // We perform this optimization only before the operation legalizer because we
19041   // may introduce illegal operations.
19042   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
19043     return SDValue();
19044
19045   unsigned NumInScalars = N->getNumOperands();
19046   SDLoc DL(N);
19047   EVT VT = N->getValueType(0);
19048
19049   // Check to see if this is a BUILD_VECTOR of a bunch of values
19050   // which come from any_extend or zero_extend nodes. If so, we can create
19051   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19052   // optimizations. We do not handle sign-extend because we can't fill the sign
19053   // using shuffles.
19054   EVT SourceType = MVT::Other;
19055   bool AllAnyExt = true;
19056
19057   for (unsigned i = 0; i != NumInScalars; ++i) {
19058     SDValue In = N->getOperand(i);
19059     // Ignore undef inputs.
19060     if (In.isUndef()) continue;
19061
19062     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
19063     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
19064
19065     // Abort if the element is not an extension.
19066     if (!ZeroExt && !AnyExt) {
19067       SourceType = MVT::Other;
19068       break;
19069     }
19070
19071     // The input is a ZeroExt or AnyExt. Check the original type.
19072     EVT InTy = In.getOperand(0).getValueType();
19073
19074     // Check that all of the widened source types are the same.
19075     if (SourceType == MVT::Other)
19076       // First time.
19077       SourceType = InTy;
19078     else if (InTy != SourceType) {
19079       // Multiple income types. Abort.
19080       SourceType = MVT::Other;
19081       break;
19082     }
19083
19084     // Check if all of the extends are ANY_EXTENDs.
19085     AllAnyExt &= AnyExt;
19086   }
19087
19088   // In order to have valid types, all of the inputs must be extended from the
19089   // same source type and all of the inputs must be any or zero extend.
19090   // Scalar sizes must be a power of two.
19091   EVT OutScalarTy = VT.getScalarType();
19092   bool ValidTypes = SourceType != MVT::Other &&
19093                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19094                  isPowerOf2_32(SourceType.getSizeInBits());
19095
19096   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19097   // turn into a single shuffle instruction.
19098   if (!ValidTypes)
19099     return SDValue();
19100
19101   // If we already have a splat buildvector, then don't fold it if it means
19102   // introducing zeros.
19103   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19104     return SDValue();
19105
19106   bool isLE = DAG.getDataLayout().isLittleEndian();
19107   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19108   assert(ElemRatio > 1 && "Invalid element size ratio");
19109   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19110                                DAG.getConstant(0, DL, SourceType);
19111
19112   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19113   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
19114
19115   // Populate the new build_vector
19116   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19117     SDValue Cast = N->getOperand(i);
19118     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
19119             Cast.getOpcode() == ISD::ZERO_EXTEND ||
19120             Cast.isUndef()) && "Invalid cast opcode");
19121     SDValue In;
19122     if (Cast.isUndef())
19123       In = DAG.getUNDEF(SourceType);
19124     else
19125       In = Cast->getOperand(0);
19126     unsigned Index = isLE ? (i * ElemRatio) :
19127                             (i * ElemRatio + (ElemRatio - 1));
19128
19129     assert(Index < Ops.size() && "Invalid index");
19130     Ops[Index] = In;
19131   }
19132
19133   // The type of the new BUILD_VECTOR node.
19134   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
19135   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
19136          "Invalid vector size");
19137   // Check if the new vector type is legal.
19138   if (!isTypeLegal(VecVT) ||
19139       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19140        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
19141     return SDValue();
19142
19143   // Make the new BUILD_VECTOR.
19144   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19145
19146   // The new BUILD_VECTOR node has the potential to be further optimized.
19147   AddToWorklist(BV.getNode());
19148   // Bitcast to the desired type.
19149   return DAG.getBitcast(VT, BV);
19150 }
19151
19152 // Simplify (build_vec (trunc $1)
19153 //                     (trunc (srl $1 half-width))
19154 //                     (trunc (srl $1 (2 * half-width))) …)
19155 // to (bitcast $1)
19156 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19157   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19158
19159   // Only for little endian
19160   if (!DAG.getDataLayout().isLittleEndian())
19161     return SDValue();
19162
19163   SDLoc DL(N);
19164   EVT VT = N->getValueType(0);
19165   EVT OutScalarTy = VT.getScalarType();
19166   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19167
19168   // Only for power of two types to be sure that bitcast works well
19169   if (!isPowerOf2_64(ScalarTypeBitsize))
19170     return SDValue();
19171
19172   unsigned NumInScalars = N->getNumOperands();
19173
19174   // Look through bitcasts
19175   auto PeekThroughBitcast = [](SDValue Op) {
19176     if (Op.getOpcode() == ISD::BITCAST)
19177       return Op.getOperand(0);
19178     return Op;
19179   };
19180
19181   // The source value where all the parts are extracted.
19182   SDValue Src;
19183   for (unsigned i = 0; i != NumInScalars; ++i) {
19184     SDValue In = PeekThroughBitcast(N->getOperand(i));
19185     // Ignore undef inputs.
19186     if (In.isUndef()) continue;
19187
19188     if (In.getOpcode() != ISD::TRUNCATE)
19189       return SDValue();
19190
19191     In = PeekThroughBitcast(In.getOperand(0));
19192
19193     if (In.getOpcode() != ISD::SRL) {
19194       // For now only build_vec without shuffling, handle shifts here in the
19195       // future.
19196       if (i != 0)
19197         return SDValue();
19198
19199       Src = In;
19200     } else {
19201       // In is SRL
19202       SDValue part = PeekThroughBitcast(In.getOperand(0));
19203
19204       if (!Src) {
19205         Src = part;
19206       } else if (Src != part) {
19207         // Vector parts do not stem from the same variable
19208         return SDValue();
19209       }
19210
19211       SDValue ShiftAmtVal = In.getOperand(1);
19212       if (!isa<ConstantSDNode>(ShiftAmtVal))
19213         return SDValue();
19214
19215       uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
19216
19217       // The extracted value is not extracted at the right position
19218       if (ShiftAmt != i * ScalarTypeBitsize)
19219         return SDValue();
19220     }
19221   }
19222
19223   // Only cast if the size is the same
19224   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19225     return SDValue();
19226
19227   return DAG.getBitcast(VT, Src);
19228 }
19229
19230 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19231                                            ArrayRef<int> VectorMask,
19232                                            SDValue VecIn1, SDValue VecIn2,
19233                                            unsigned LeftIdx, bool DidSplitVec) {
19234   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19235
19236   EVT VT = N->getValueType(0);
19237   EVT InVT1 = VecIn1.getValueType();
19238   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19239
19240   unsigned NumElems = VT.getVectorNumElements();
19241   unsigned ShuffleNumElems = NumElems;
19242
19243   // If we artificially split a vector in two already, then the offsets in the
19244   // operands will all be based off of VecIn1, even those in VecIn2.
19245   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19246
19247   uint64_t VTSize = VT.getFixedSizeInBits();
19248   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19249   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19250
19251   assert(InVT2Size <= InVT1Size &&
19252          "Inputs must be sorted to be in non-increasing vector size order.");
19253
19254   // We can't generate a shuffle node with mismatched input and output types.
19255   // Try to make the types match the type of the output.
19256   if (InVT1 != VT || InVT2 != VT) {
19257     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19258       // If the output vector length is a multiple of both input lengths,
19259       // we can concatenate them and pad the rest with undefs.
19260       unsigned NumConcats = VTSize / InVT1Size;
19261       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
19262       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
19263       ConcatOps[0] = VecIn1;
19264       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19265       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19266       VecIn2 = SDValue();
19267     } else if (InVT1Size == VTSize * 2) {
19268       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19269         return SDValue();
19270
19271       if (!VecIn2.getNode()) {
19272         // If we only have one input vector, and it's twice the size of the
19273         // output, split it in two.
19274         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
19275                              DAG.getVectorIdxConstant(NumElems, DL));
19276         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
19277         // Since we now have shorter input vectors, adjust the offset of the
19278         // second vector's start.
19279         Vec2Offset = NumElems;
19280       } else {
19281         assert(InVT2Size <= InVT1Size &&
19282                "Second input is not going to be larger than the first one.");
19283
19284         // VecIn1 is wider than the output, and we have another, possibly
19285         // smaller input. Pad the smaller input with undefs, shuffle at the
19286         // input vector width, and extract the output.
19287         // The shuffle type is different than VT, so check legality again.
19288         if (LegalOperations &&
19289             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
19290           return SDValue();
19291
19292         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19293         // lower it back into a BUILD_VECTOR. So if the inserted type is
19294         // illegal, don't even try.
19295         if (InVT1 != InVT2) {
19296           if (!TLI.isTypeLegal(InVT2))
19297             return SDValue();
19298           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19299                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19300         }
19301         ShuffleNumElems = NumElems * 2;
19302       }
19303     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19304       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19305       ConcatOps[0] = VecIn2;
19306       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19307     } else {
19308       // TODO: Support cases where the length mismatch isn't exactly by a
19309       // factor of 2.
19310       // TODO: Move this check upwards, so that if we have bad type
19311       // mismatches, we don't create any DAG nodes.
19312       return SDValue();
19313     }
19314   }
19315
19316   // Initialize mask to undef.
19317   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19318
19319   // Only need to run up to the number of elements actually used, not the
19320   // total number of elements in the shuffle - if we are shuffling a wider
19321   // vector, the high lanes should be set to undef.
19322   for (unsigned i = 0; i != NumElems; ++i) {
19323     if (VectorMask[i] <= 0)
19324       continue;
19325
19326     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19327     if (VectorMask[i] == (int)LeftIdx) {
19328       Mask[i] = ExtIndex;
19329     } else if (VectorMask[i] == (int)LeftIdx + 1) {
19330       Mask[i] = Vec2Offset + ExtIndex;
19331     }
19332   }
19333
19334   // The type the input vectors may have changed above.
19335   InVT1 = VecIn1.getValueType();
19336
19337   // If we already have a VecIn2, it should have the same type as VecIn1.
19338   // If we don't, get an undef/zero vector of the appropriate type.
19339   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19340   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
19341
19342   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19343   if (ShuffleNumElems > NumElems)
19344     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19345
19346   return Shuffle;
19347 }
19348
19349 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
19350   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19351
19352   // First, determine where the build vector is not undef.
19353   // TODO: We could extend this to handle zero elements as well as undefs.
19354   int NumBVOps = BV->getNumOperands();
19355   int ZextElt = -1;
19356   for (int i = 0; i != NumBVOps; ++i) {
19357     SDValue Op = BV->getOperand(i);
19358     if (Op.isUndef())
19359       continue;
19360     if (ZextElt == -1)
19361       ZextElt = i;
19362     else
19363       return SDValue();
19364   }
19365   // Bail out if there's no non-undef element.
19366   if (ZextElt == -1)
19367     return SDValue();
19368
19369   // The build vector contains some number of undef elements and exactly
19370   // one other element. That other element must be a zero-extended scalar
19371   // extracted from a vector at a constant index to turn this into a shuffle.
19372   // Also, require that the build vector does not implicitly truncate/extend
19373   // its elements.
19374   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19375   EVT VT = BV->getValueType(0);
19376   SDValue Zext = BV->getOperand(ZextElt);
19377   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19378       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19379       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19380       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19381     return SDValue();
19382
19383   // The zero-extend must be a multiple of the source size, and we must be
19384   // building a vector of the same size as the source of the extract element.
19385   SDValue Extract = Zext.getOperand(0);
19386   unsigned DestSize = Zext.getValueSizeInBits();
19387   unsigned SrcSize = Extract.getValueSizeInBits();
19388   if (DestSize % SrcSize != 0 ||
19389       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19390     return SDValue();
19391
19392   // Create a shuffle mask that will combine the extracted element with zeros
19393   // and undefs.
19394   int ZextRatio = DestSize / SrcSize;
19395   int NumMaskElts = NumBVOps * ZextRatio;
19396   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19397   for (int i = 0; i != NumMaskElts; ++i) {
19398     if (i / ZextRatio == ZextElt) {
19399       // The low bits of the (potentially translated) extracted element map to
19400       // the source vector. The high bits map to zero. We will use a zero vector
19401       // as the 2nd source operand of the shuffle, so use the 1st element of
19402       // that vector (mask value is number-of-elements) for the high bits.
19403       if (i % ZextRatio == 0)
19404         ShufMask[i] = Extract.getConstantOperandVal(1);
19405       else
19406         ShufMask[i] = NumMaskElts;
19407     }
19408
19409     // Undef elements of the build vector remain undef because we initialize
19410     // the shuffle mask with -1.
19411   }
19412
19413   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19414   // bitcast (shuffle V, ZeroVec, VectorMask)
19415   SDLoc DL(BV);
19416   EVT VecVT = Extract.getOperand(0).getValueType();
19417   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19418   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19419   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19420                                              ZeroVec, ShufMask, DAG);
19421   if (!Shuf)
19422     return SDValue();
19423   return DAG.getBitcast(VT, Shuf);
19424 }
19425
19426 // FIXME: promote to STLExtras.
19427 template <typename R, typename T>
19428 static auto getFirstIndexOf(R &&Range, const T &Val) {
19429   auto I = find(Range, Val);
19430   if (I == Range.end())
19431     return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
19432   return std::distance(Range.begin(), I);
19433 }
19434
19435 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19436 // operations. If the types of the vectors we're extracting from allow it,
19437 // turn this into a vector_shuffle node.
19438 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19439   SDLoc DL(N);
19440   EVT VT = N->getValueType(0);
19441
19442   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19443   if (!isTypeLegal(VT))
19444     return SDValue();
19445
19446   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
19447     return V;
19448
19449   // May only combine to shuffle after legalize if shuffle is legal.
19450   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19451     return SDValue();
19452
19453   bool UsesZeroVector = false;
19454   unsigned NumElems = N->getNumOperands();
19455
19456   // Record, for each element of the newly built vector, which input vector
19457   // that element comes from. -1 stands for undef, 0 for the zero vector,
19458   // and positive values for the input vectors.
19459   // VectorMask maps each element to its vector number, and VecIn maps vector
19460   // numbers to their initial SDValues.
19461
19462   SmallVector<int, 8> VectorMask(NumElems, -1);
19463   SmallVector<SDValue, 8> VecIn;
19464   VecIn.push_back(SDValue());
19465
19466   for (unsigned i = 0; i != NumElems; ++i) {
19467     SDValue Op = N->getOperand(i);
19468
19469     if (Op.isUndef())
19470       continue;
19471
19472     // See if we can use a blend with a zero vector.
19473     // TODO: Should we generalize this to a blend with an arbitrary constant
19474     // vector?
19475     if (isNullConstant(Op) || isNullFPConstant(Op)) {
19476       UsesZeroVector = true;
19477       VectorMask[i] = 0;
19478       continue;
19479     }
19480
19481     // Not an undef or zero. If the input is something other than an
19482     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19483     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19484         !isa<ConstantSDNode>(Op.getOperand(1)))
19485       return SDValue();
19486     SDValue ExtractedFromVec = Op.getOperand(0);
19487
19488     if (ExtractedFromVec.getValueType().isScalableVector())
19489       return SDValue();
19490
19491     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19492     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19493       return SDValue();
19494
19495     // All inputs must have the same element type as the output.
19496     if (VT.getVectorElementType() !=
19497         ExtractedFromVec.getValueType().getVectorElementType())
19498       return SDValue();
19499
19500     // Have we seen this input vector before?
19501     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19502     // a map back from SDValues to numbers isn't worth it.
19503     int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
19504     if (Idx == -1) { // A new source vector?
19505       Idx = VecIn.size();
19506       VecIn.push_back(ExtractedFromVec);
19507     }
19508
19509     VectorMask[i] = Idx;
19510   }
19511
19512   // If we didn't find at least one input vector, bail out.
19513   if (VecIn.size() < 2)
19514     return SDValue();
19515
19516   // If all the Operands of BUILD_VECTOR extract from same
19517   // vector, then split the vector efficiently based on the maximum
19518   // vector access index and adjust the VectorMask and
19519   // VecIn accordingly.
19520   bool DidSplitVec = false;
19521   if (VecIn.size() == 2) {
19522     unsigned MaxIndex = 0;
19523     unsigned NearestPow2 = 0;
19524     SDValue Vec = VecIn.back();
19525     EVT InVT = Vec.getValueType();
19526     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19527
19528     for (unsigned i = 0; i < NumElems; i++) {
19529       if (VectorMask[i] <= 0)
19530         continue;
19531       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19532       IndexVec[i] = Index;
19533       MaxIndex = std::max(MaxIndex, Index);
19534     }
19535
19536     NearestPow2 = PowerOf2Ceil(MaxIndex);
19537     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19538         NumElems * 2 < NearestPow2) {
19539       unsigned SplitSize = NearestPow2 / 2;
19540       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19541                                      InVT.getVectorElementType(), SplitSize);
19542       if (TLI.isTypeLegal(SplitVT) &&
19543           SplitSize + SplitVT.getVectorNumElements() <=
19544               InVT.getVectorNumElements()) {
19545         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19546                                      DAG.getVectorIdxConstant(SplitSize, DL));
19547         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19548                                      DAG.getVectorIdxConstant(0, DL));
19549         VecIn.pop_back();
19550         VecIn.push_back(VecIn1);
19551         VecIn.push_back(VecIn2);
19552         DidSplitVec = true;
19553
19554         for (unsigned i = 0; i < NumElems; i++) {
19555           if (VectorMask[i] <= 0)
19556             continue;
19557           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
19558         }
19559       }
19560     }
19561   }
19562
19563   // Sort input vectors by decreasing vector element count,
19564   // while preserving the relative order of equally-sized vectors.
19565   // Note that we keep the first "implicit zero vector as-is.
19566   SmallVector<SDValue, 8> SortedVecIn(VecIn);
19567   llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
19568                     [](const SDValue &a, const SDValue &b) {
19569                       return a.getValueType().getVectorNumElements() >
19570                              b.getValueType().getVectorNumElements();
19571                     });
19572
19573   // We now also need to rebuild the VectorMask, because it referenced element
19574   // order in VecIn, and we just sorted them.
19575   for (int &SourceVectorIndex : VectorMask) {
19576     if (SourceVectorIndex <= 0)
19577       continue;
19578     unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
19579     assert(Idx > 0 && Idx < SortedVecIn.size() &&
19580            VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
19581     SourceVectorIndex = Idx;
19582   }
19583
19584   VecIn = std::move(SortedVecIn);
19585
19586   // TODO: Should this fire if some of the input vectors has illegal type (like
19587   // it does now), or should we let legalization run its course first?
19588
19589   // Shuffle phase:
19590   // Take pairs of vectors, and shuffle them so that the result has elements
19591   // from these vectors in the correct places.
19592   // For example, given:
19593   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
19594   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
19595   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
19596   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
19597   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
19598   // We will generate:
19599   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
19600   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
19601   SmallVector<SDValue, 4> Shuffles;
19602   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
19603     unsigned LeftIdx = 2 * In + 1;
19604     SDValue VecLeft = VecIn[LeftIdx];
19605     SDValue VecRight =
19606         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
19607
19608     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
19609                                                 VecRight, LeftIdx, DidSplitVec))
19610       Shuffles.push_back(Shuffle);
19611     else
19612       return SDValue();
19613   }
19614
19615   // If we need the zero vector as an "ingredient" in the blend tree, add it
19616   // to the list of shuffles.
19617   if (UsesZeroVector)
19618     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
19619                                       : DAG.getConstantFP(0.0, DL, VT));
19620
19621   // If we only have one shuffle, we're done.
19622   if (Shuffles.size() == 1)
19623     return Shuffles[0];
19624
19625   // Update the vector mask to point to the post-shuffle vectors.
19626   for (int &Vec : VectorMask)
19627     if (Vec == 0)
19628       Vec = Shuffles.size() - 1;
19629     else
19630       Vec = (Vec - 1) / 2;
19631
19632   // More than one shuffle. Generate a binary tree of blends, e.g. if from
19633   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
19634   // generate:
19635   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
19636   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
19637   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
19638   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
19639   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
19640   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
19641   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
19642
19643   // Make sure the initial size of the shuffle list is even.
19644   if (Shuffles.size() % 2)
19645     Shuffles.push_back(DAG.getUNDEF(VT));
19646
19647   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
19648     if (CurSize % 2) {
19649       Shuffles[CurSize] = DAG.getUNDEF(VT);
19650       CurSize++;
19651     }
19652     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
19653       int Left = 2 * In;
19654       int Right = 2 * In + 1;
19655       SmallVector<int, 8> Mask(NumElems, -1);
19656       for (unsigned i = 0; i != NumElems; ++i) {
19657         if (VectorMask[i] == Left) {
19658           Mask[i] = i;
19659           VectorMask[i] = In;
19660         } else if (VectorMask[i] == Right) {
19661           Mask[i] = i + NumElems;
19662           VectorMask[i] = In;
19663         }
19664       }
19665
19666       Shuffles[In] =
19667           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
19668     }
19669   }
19670   return Shuffles[0];
19671 }
19672
19673 // Try to turn a build vector of zero extends of extract vector elts into a
19674 // a vector zero extend and possibly an extract subvector.
19675 // TODO: Support sign extend?
19676 // TODO: Allow undef elements?
19677 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
19678   if (LegalOperations)
19679     return SDValue();
19680
19681   EVT VT = N->getValueType(0);
19682
19683   bool FoundZeroExtend = false;
19684   SDValue Op0 = N->getOperand(0);
19685   auto checkElem = [&](SDValue Op) -> int64_t {
19686     unsigned Opc = Op.getOpcode();
19687     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
19688     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
19689         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19690         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
19691       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
19692         return C->getZExtValue();
19693     return -1;
19694   };
19695
19696   // Make sure the first element matches
19697   // (zext (extract_vector_elt X, C))
19698   int64_t Offset = checkElem(Op0);
19699   if (Offset < 0)
19700     return SDValue();
19701
19702   unsigned NumElems = N->getNumOperands();
19703   SDValue In = Op0.getOperand(0).getOperand(0);
19704   EVT InSVT = In.getValueType().getScalarType();
19705   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
19706
19707   // Don't create an illegal input type after type legalization.
19708   if (LegalTypes && !TLI.isTypeLegal(InVT))
19709     return SDValue();
19710
19711   // Ensure all the elements come from the same vector and are adjacent.
19712   for (unsigned i = 1; i != NumElems; ++i) {
19713     if ((Offset + i) != checkElem(N->getOperand(i)))
19714       return SDValue();
19715   }
19716
19717   SDLoc DL(N);
19718   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
19719                    Op0.getOperand(0).getOperand(1));
19720   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
19721                      VT, In);
19722 }
19723
19724 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
19725   EVT VT = N->getValueType(0);
19726
19727   // A vector built entirely of undefs is undef.
19728   if (ISD::allOperandsUndef(N))
19729     return DAG.getUNDEF(VT);
19730
19731   // If this is a splat of a bitcast from another vector, change to a
19732   // concat_vector.
19733   // For example:
19734   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
19735   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
19736   //
19737   // If X is a build_vector itself, the concat can become a larger build_vector.
19738   // TODO: Maybe this is useful for non-splat too?
19739   if (!LegalOperations) {
19740     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19741       Splat = peekThroughBitcasts(Splat);
19742       EVT SrcVT = Splat.getValueType();
19743       if (SrcVT.isVector()) {
19744         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
19745         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
19746                                      SrcVT.getVectorElementType(), NumElts);
19747         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
19748           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
19749           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
19750                                        NewVT, Ops);
19751           return DAG.getBitcast(VT, Concat);
19752         }
19753       }
19754     }
19755   }
19756
19757   // Check if we can express BUILD VECTOR via subvector extract.
19758   if (!LegalTypes && (N->getNumOperands() > 1)) {
19759     SDValue Op0 = N->getOperand(0);
19760     auto checkElem = [&](SDValue Op) -> uint64_t {
19761       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
19762           (Op0.getOperand(0) == Op.getOperand(0)))
19763         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
19764           return CNode->getZExtValue();
19765       return -1;
19766     };
19767
19768     int Offset = checkElem(Op0);
19769     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
19770       if (Offset + i != checkElem(N->getOperand(i))) {
19771         Offset = -1;
19772         break;
19773       }
19774     }
19775
19776     if ((Offset == 0) &&
19777         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
19778       return Op0.getOperand(0);
19779     if ((Offset != -1) &&
19780         ((Offset % N->getValueType(0).getVectorNumElements()) ==
19781          0)) // IDX must be multiple of output size.
19782       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
19783                          Op0.getOperand(0), Op0.getOperand(1));
19784   }
19785
19786   if (SDValue V = convertBuildVecZextToZext(N))
19787     return V;
19788
19789   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
19790     return V;
19791
19792   if (SDValue V = reduceBuildVecTruncToBitCast(N))
19793     return V;
19794
19795   if (SDValue V = reduceBuildVecToShuffle(N))
19796     return V;
19797
19798   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
19799   // Do this late as some of the above may replace the splat.
19800   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
19801     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19802       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
19803       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
19804     }
19805
19806   return SDValue();
19807 }
19808
19809 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
19810   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19811   EVT OpVT = N->getOperand(0).getValueType();
19812
19813   // If the operands are legal vectors, leave them alone.
19814   if (TLI.isTypeLegal(OpVT))
19815     return SDValue();
19816
19817   SDLoc DL(N);
19818   EVT VT = N->getValueType(0);
19819   SmallVector<SDValue, 8> Ops;
19820
19821   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
19822   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19823
19824   // Keep track of what we encounter.
19825   bool AnyInteger = false;
19826   bool AnyFP = false;
19827   for (const SDValue &Op : N->ops()) {
19828     if (ISD::BITCAST == Op.getOpcode() &&
19829         !Op.getOperand(0).getValueType().isVector())
19830       Ops.push_back(Op.getOperand(0));
19831     else if (ISD::UNDEF == Op.getOpcode())
19832       Ops.push_back(ScalarUndef);
19833     else
19834       return SDValue();
19835
19836     // Note whether we encounter an integer or floating point scalar.
19837     // If it's neither, bail out, it could be something weird like x86mmx.
19838     EVT LastOpVT = Ops.back().getValueType();
19839     if (LastOpVT.isFloatingPoint())
19840       AnyFP = true;
19841     else if (LastOpVT.isInteger())
19842       AnyInteger = true;
19843     else
19844       return SDValue();
19845   }
19846
19847   // If any of the operands is a floating point scalar bitcast to a vector,
19848   // use floating point types throughout, and bitcast everything.
19849   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
19850   if (AnyFP) {
19851     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
19852     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19853     if (AnyInteger) {
19854       for (SDValue &Op : Ops) {
19855         if (Op.getValueType() == SVT)
19856           continue;
19857         if (Op.isUndef())
19858           Op = ScalarUndef;
19859         else
19860           Op = DAG.getBitcast(SVT, Op);
19861       }
19862     }
19863   }
19864
19865   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
19866                                VT.getSizeInBits() / SVT.getSizeInBits());
19867   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
19868 }
19869
19870 // Attempt to merge nested concat_vectors/undefs.
19871 // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
19872 //  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
19873 static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
19874                                                   SelectionDAG &DAG) {
19875   EVT VT = N->getValueType(0);
19876
19877   // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
19878   EVT SubVT;
19879   SDValue FirstConcat;
19880   for (const SDValue &Op : N->ops()) {
19881     if (Op.isUndef())
19882       continue;
19883     if (Op.getOpcode() != ISD::CONCAT_VECTORS)
19884       return SDValue();
19885     if (!FirstConcat) {
19886       SubVT = Op.getOperand(0).getValueType();
19887       if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
19888         return SDValue();
19889       FirstConcat = Op;
19890       continue;
19891     }
19892     if (SubVT != Op.getOperand(0).getValueType())
19893       return SDValue();
19894   }
19895   assert(FirstConcat && "Concat of all-undefs found");
19896
19897   SmallVector<SDValue> ConcatOps;
19898   for (const SDValue &Op : N->ops()) {
19899     if (Op.isUndef()) {
19900       ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
19901       continue;
19902     }
19903     ConcatOps.append(Op->op_begin(), Op->op_end());
19904   }
19905   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
19906 }
19907
19908 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
19909 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
19910 // most two distinct vectors the same size as the result, attempt to turn this
19911 // into a legal shuffle.
19912 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
19913   EVT VT = N->getValueType(0);
19914   EVT OpVT = N->getOperand(0).getValueType();
19915
19916   // We currently can't generate an appropriate shuffle for a scalable vector.
19917   if (VT.isScalableVector())
19918     return SDValue();
19919
19920   int NumElts = VT.getVectorNumElements();
19921   int NumOpElts = OpVT.getVectorNumElements();
19922
19923   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
19924   SmallVector<int, 8> Mask;
19925
19926   for (SDValue Op : N->ops()) {
19927     Op = peekThroughBitcasts(Op);
19928
19929     // UNDEF nodes convert to UNDEF shuffle mask values.
19930     if (Op.isUndef()) {
19931       Mask.append((unsigned)NumOpElts, -1);
19932       continue;
19933     }
19934
19935     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19936       return SDValue();
19937
19938     // What vector are we extracting the subvector from and at what index?
19939     SDValue ExtVec = Op.getOperand(0);
19940     int ExtIdx = Op.getConstantOperandVal(1);
19941
19942     // We want the EVT of the original extraction to correctly scale the
19943     // extraction index.
19944     EVT ExtVT = ExtVec.getValueType();
19945     ExtVec = peekThroughBitcasts(ExtVec);
19946
19947     // UNDEF nodes convert to UNDEF shuffle mask values.
19948     if (ExtVec.isUndef()) {
19949       Mask.append((unsigned)NumOpElts, -1);
19950       continue;
19951     }
19952
19953     // Ensure that we are extracting a subvector from a vector the same
19954     // size as the result.
19955     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
19956       return SDValue();
19957
19958     // Scale the subvector index to account for any bitcast.
19959     int NumExtElts = ExtVT.getVectorNumElements();
19960     if (0 == (NumExtElts % NumElts))
19961       ExtIdx /= (NumExtElts / NumElts);
19962     else if (0 == (NumElts % NumExtElts))
19963       ExtIdx *= (NumElts / NumExtElts);
19964     else
19965       return SDValue();
19966
19967     // At most we can reference 2 inputs in the final shuffle.
19968     if (SV0.isUndef() || SV0 == ExtVec) {
19969       SV0 = ExtVec;
19970       for (int i = 0; i != NumOpElts; ++i)
19971         Mask.push_back(i + ExtIdx);
19972     } else if (SV1.isUndef() || SV1 == ExtVec) {
19973       SV1 = ExtVec;
19974       for (int i = 0; i != NumOpElts; ++i)
19975         Mask.push_back(i + ExtIdx + NumElts);
19976     } else {
19977       return SDValue();
19978     }
19979   }
19980
19981   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19982   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
19983                                      DAG.getBitcast(VT, SV1), Mask, DAG);
19984 }
19985
19986 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
19987   unsigned CastOpcode = N->getOperand(0).getOpcode();
19988   switch (CastOpcode) {
19989   case ISD::SINT_TO_FP:
19990   case ISD::UINT_TO_FP:
19991   case ISD::FP_TO_SINT:
19992   case ISD::FP_TO_UINT:
19993     // TODO: Allow more opcodes?
19994     //  case ISD::BITCAST:
19995     //  case ISD::TRUNCATE:
19996     //  case ISD::ZERO_EXTEND:
19997     //  case ISD::SIGN_EXTEND:
19998     //  case ISD::FP_EXTEND:
19999     break;
20000   default:
20001     return SDValue();
20002   }
20003
20004   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
20005   if (!SrcVT.isVector())
20006     return SDValue();
20007
20008   // All operands of the concat must be the same kind of cast from the same
20009   // source type.
20010   SmallVector<SDValue, 4> SrcOps;
20011   for (SDValue Op : N->ops()) {
20012     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
20013         Op.getOperand(0).getValueType() != SrcVT)
20014       return SDValue();
20015     SrcOps.push_back(Op.getOperand(0));
20016   }
20017
20018   // The wider cast must be supported by the target. This is unusual because
20019   // the operation support type parameter depends on the opcode. In addition,
20020   // check the other type in the cast to make sure this is really legal.
20021   EVT VT = N->getValueType(0);
20022   EVT SrcEltVT = SrcVT.getVectorElementType();
20023   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
20024   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
20025   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20026   switch (CastOpcode) {
20027   case ISD::SINT_TO_FP:
20028   case ISD::UINT_TO_FP:
20029     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
20030         !TLI.isTypeLegal(VT))
20031       return SDValue();
20032     break;
20033   case ISD::FP_TO_SINT:
20034   case ISD::FP_TO_UINT:
20035     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
20036         !TLI.isTypeLegal(ConcatSrcVT))
20037       return SDValue();
20038     break;
20039   default:
20040     llvm_unreachable("Unexpected cast opcode");
20041   }
20042
20043   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
20044   SDLoc DL(N);
20045   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
20046   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
20047 }
20048
20049 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
20050   // If we only have one input vector, we don't need to do any concatenation.
20051   if (N->getNumOperands() == 1)
20052     return N->getOperand(0);
20053
20054   // Check if all of the operands are undefs.
20055   EVT VT = N->getValueType(0);
20056   if (ISD::allOperandsUndef(N))
20057     return DAG.getUNDEF(VT);
20058
20059   // Optimize concat_vectors where all but the first of the vectors are undef.
20060   if (all_of(drop_begin(N->ops()),
20061              [](const SDValue &Op) { return Op.isUndef(); })) {
20062     SDValue In = N->getOperand(0);
20063     assert(In.getValueType().isVector() && "Must concat vectors");
20064
20065     // If the input is a concat_vectors, just make a larger concat by padding
20066     // with smaller undefs.
20067     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
20068       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
20069       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
20070       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
20071       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20072     }
20073
20074     SDValue Scalar = peekThroughOneUseBitcasts(In);
20075
20076     // concat_vectors(scalar_to_vector(scalar), undef) ->
20077     //     scalar_to_vector(scalar)
20078     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20079          Scalar.hasOneUse()) {
20080       EVT SVT = Scalar.getValueType().getVectorElementType();
20081       if (SVT == Scalar.getOperand(0).getValueType())
20082         Scalar = Scalar.getOperand(0);
20083     }
20084
20085     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
20086     if (!Scalar.getValueType().isVector()) {
20087       // If the bitcast type isn't legal, it might be a trunc of a legal type;
20088       // look through the trunc so we can still do the transform:
20089       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
20090       if (Scalar->getOpcode() == ISD::TRUNCATE &&
20091           !TLI.isTypeLegal(Scalar.getValueType()) &&
20092           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
20093         Scalar = Scalar->getOperand(0);
20094
20095       EVT SclTy = Scalar.getValueType();
20096
20097       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
20098         return SDValue();
20099
20100       // Bail out if the vector size is not a multiple of the scalar size.
20101       if (VT.getSizeInBits() % SclTy.getSizeInBits())
20102         return SDValue();
20103
20104       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
20105       if (VNTNumElms < 2)
20106         return SDValue();
20107
20108       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
20109       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
20110         return SDValue();
20111
20112       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
20113       return DAG.getBitcast(VT, Res);
20114     }
20115   }
20116
20117   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
20118   // We have already tested above for an UNDEF only concatenation.
20119   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20120   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20121   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20122     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20123   };
20124   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20125     SmallVector<SDValue, 8> Opnds;
20126     EVT SVT = VT.getScalarType();
20127
20128     EVT MinVT = SVT;
20129     if (!SVT.isFloatingPoint()) {
20130       // If BUILD_VECTOR are from built from integer, they may have different
20131       // operand types. Get the smallest type and truncate all operands to it.
20132       bool FoundMinVT = false;
20133       for (const SDValue &Op : N->ops())
20134         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20135           EVT OpSVT = Op.getOperand(0).getValueType();
20136           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20137           FoundMinVT = true;
20138         }
20139       assert(FoundMinVT && "Concat vector type mismatch");
20140     }
20141
20142     for (const SDValue &Op : N->ops()) {
20143       EVT OpVT = Op.getValueType();
20144       unsigned NumElts = OpVT.getVectorNumElements();
20145
20146       if (ISD::UNDEF == Op.getOpcode())
20147         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20148
20149       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20150         if (SVT.isFloatingPoint()) {
20151           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
20152           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20153         } else {
20154           for (unsigned i = 0; i != NumElts; ++i)
20155             Opnds.push_back(
20156                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20157         }
20158       }
20159     }
20160
20161     assert(VT.getVectorNumElements() == Opnds.size() &&
20162            "Concat vector type mismatch");
20163     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20164   }
20165
20166   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20167   // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
20168   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
20169     return V;
20170
20171   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
20172     // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
20173     if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
20174       return V;
20175
20176     // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20177     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
20178       return V;
20179   }
20180
20181   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20182     return V;
20183
20184   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20185   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20186   // operands and look for a CONCAT operations that place the incoming vectors
20187   // at the exact same location.
20188   //
20189   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20190   SDValue SingleSource = SDValue();
20191   unsigned PartNumElem =
20192       N->getOperand(0).getValueType().getVectorMinNumElements();
20193
20194   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20195     SDValue Op = N->getOperand(i);
20196
20197     if (Op.isUndef())
20198       continue;
20199
20200     // Check if this is the identity extract:
20201     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20202       return SDValue();
20203
20204     // Find the single incoming vector for the extract_subvector.
20205     if (SingleSource.getNode()) {
20206       if (Op.getOperand(0) != SingleSource)
20207         return SDValue();
20208     } else {
20209       SingleSource = Op.getOperand(0);
20210
20211       // Check the source type is the same as the type of the result.
20212       // If not, this concat may extend the vector, so we can not
20213       // optimize it away.
20214       if (SingleSource.getValueType() != N->getValueType(0))
20215         return SDValue();
20216     }
20217
20218     // Check that we are reading from the identity index.
20219     unsigned IdentityIndex = i * PartNumElem;
20220     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20221       return SDValue();
20222   }
20223
20224   if (SingleSource.getNode())
20225     return SingleSource;
20226
20227   return SDValue();
20228 }
20229
20230 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20231 // if the subvector can be sourced for free.
20232 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
20233   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20234       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20235     return V.getOperand(1);
20236   }
20237   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20238   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
20239       V.getOperand(0).getValueType() == SubVT &&
20240       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
20241     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
20242     return V.getOperand(SubIdx);
20243   }
20244   return SDValue();
20245 }
20246
20247 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
20248                                               SelectionDAG &DAG,
20249                                               bool LegalOperations) {
20250   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20251   SDValue BinOp = Extract->getOperand(0);
20252   unsigned BinOpcode = BinOp.getOpcode();
20253   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
20254     return SDValue();
20255
20256   EVT VecVT = BinOp.getValueType();
20257   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
20258   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
20259     return SDValue();
20260
20261   SDValue Index = Extract->getOperand(1);
20262   EVT SubVT = Extract->getValueType(0);
20263   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
20264     return SDValue();
20265
20266   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
20267   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
20268
20269   // TODO: We could handle the case where only 1 operand is being inserted by
20270   //       creating an extract of the other operand, but that requires checking
20271   //       number of uses and/or costs.
20272   if (!Sub0 || !Sub1)
20273     return SDValue();
20274
20275   // We are inserting both operands of the wide binop only to extract back
20276   // to the narrow vector size. Eliminate all of the insert/extract:
20277   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
20278   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
20279                      BinOp->getFlags());
20280 }
20281
20282 /// If we are extracting a subvector produced by a wide binary operator try
20283 /// to use a narrow binary operator and/or avoid concatenation and extraction.
20284 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
20285                                           bool LegalOperations) {
20286   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
20287   // some of these bailouts with other transforms.
20288
20289   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20290     return V;
20291
20292   // The extract index must be a constant, so we can map it to a concat operand.
20293   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20294   if (!ExtractIndexC)
20295     return SDValue();
20296
20297   // We are looking for an optionally bitcasted wide vector binary operator
20298   // feeding an extract subvector.
20299   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20300   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20301   unsigned BOpcode = BinOp.getOpcode();
20302   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
20303     return SDValue();
20304
20305   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20306   // reduced to the unary fneg when it is visited, and we probably want to deal
20307   // with fneg in a target-specific way.
20308   if (BOpcode == ISD::FSUB) {
20309     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20310     if (C && C->getValueAPF().isNegZero())
20311       return SDValue();
20312   }
20313
20314   // The binop must be a vector type, so we can extract some fraction of it.
20315   EVT WideBVT = BinOp.getValueType();
20316   // The optimisations below currently assume we are dealing with fixed length
20317   // vectors. It is possible to add support for scalable vectors, but at the
20318   // moment we've done no analysis to prove whether they are profitable or not.
20319   if (!WideBVT.isFixedLengthVector())
20320     return SDValue();
20321
20322   EVT VT = Extract->getValueType(0);
20323   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
20324   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
20325          "Extract index is not a multiple of the vector length.");
20326
20327   // Bail out if this is not a proper multiple width extraction.
20328   unsigned WideWidth = WideBVT.getSizeInBits();
20329   unsigned NarrowWidth = VT.getSizeInBits();
20330   if (WideWidth % NarrowWidth != 0)
20331     return SDValue();
20332
20333   // Bail out if we are extracting a fraction of a single operation. This can
20334   // occur because we potentially looked through a bitcast of the binop.
20335   unsigned NarrowingRatio = WideWidth / NarrowWidth;
20336   unsigned WideNumElts = WideBVT.getVectorNumElements();
20337   if (WideNumElts % NarrowingRatio != 0)
20338     return SDValue();
20339
20340   // Bail out if the target does not support a narrower version of the binop.
20341   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
20342                                    WideNumElts / NarrowingRatio);
20343   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
20344     return SDValue();
20345
20346   // If extraction is cheap, we don't need to look at the binop operands
20347   // for concat ops. The narrow binop alone makes this transform profitable.
20348   // We can't just reuse the original extract index operand because we may have
20349   // bitcasted.
20350   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
20351   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
20352   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
20353       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
20354     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
20355     SDLoc DL(Extract);
20356     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20357     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20358                             BinOp.getOperand(0), NewExtIndex);
20359     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20360                             BinOp.getOperand(1), NewExtIndex);
20361     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
20362                                       BinOp.getNode()->getFlags());
20363     return DAG.getBitcast(VT, NarrowBinOp);
20364   }
20365
20366   // Only handle the case where we are doubling and then halving. A larger ratio
20367   // may require more than two narrow binops to replace the wide binop.
20368   if (NarrowingRatio != 2)
20369     return SDValue();
20370
20371   // TODO: The motivating case for this transform is an x86 AVX1 target. That
20372   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20373   // flavors, but no other 256-bit integer support. This could be extended to
20374   // handle any binop, but that may require fixing/adding other folds to avoid
20375   // codegen regressions.
20376   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20377     return SDValue();
20378
20379   // We need at least one concatenation operation of a binop operand to make
20380   // this transform worthwhile. The concat must double the input vector sizes.
20381   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20382     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20383       return V.getOperand(ConcatOpNum);
20384     return SDValue();
20385   };
20386   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20387   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20388
20389   if (SubVecL || SubVecR) {
20390     // If a binop operand was not the result of a concat, we must extract a
20391     // half-sized operand for our new narrow binop:
20392     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20393     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20394     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20395     SDLoc DL(Extract);
20396     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20397     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20398                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20399                                       BinOp.getOperand(0), IndexC);
20400
20401     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20402                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20403                                       BinOp.getOperand(1), IndexC);
20404
20405     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20406     return DAG.getBitcast(VT, NarrowBinOp);
20407   }
20408
20409   return SDValue();
20410 }
20411
20412 /// If we are extracting a subvector from a wide vector load, convert to a
20413 /// narrow load to eliminate the extraction:
20414 /// (extract_subvector (load wide vector)) --> (load narrow vector)
20415 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
20416   // TODO: Add support for big-endian. The offset calculation must be adjusted.
20417   if (DAG.getDataLayout().isBigEndian())
20418     return SDValue();
20419
20420   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20421   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20422   if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
20423       !ExtIdx)
20424     return SDValue();
20425
20426   // Allow targets to opt-out.
20427   EVT VT = Extract->getValueType(0);
20428
20429   // We can only create byte sized loads.
20430   if (!VT.isByteSized())
20431     return SDValue();
20432
20433   unsigned Index = ExtIdx->getZExtValue();
20434   unsigned NumElts = VT.getVectorMinNumElements();
20435
20436   // The definition of EXTRACT_SUBVECTOR states that the index must be a
20437   // multiple of the minimum number of elements in the result type.
20438   assert(Index % NumElts == 0 && "The extract subvector index is not a "
20439                                  "multiple of the result's element count");
20440
20441   // It's fine to use TypeSize here as we know the offset will not be negative.
20442   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20443
20444   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20445   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20446     return SDValue();
20447
20448   // The narrow load will be offset from the base address of the old load if
20449   // we are extracting from something besides index 0 (little-endian).
20450   SDLoc DL(Extract);
20451
20452   // TODO: Use "BaseIndexOffset" to make this more effective.
20453   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20454
20455   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
20456   MachineFunction &MF = DAG.getMachineFunction();
20457   MachineMemOperand *MMO;
20458   if (Offset.isScalable()) {
20459     MachinePointerInfo MPI =
20460         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20461     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20462   } else
20463     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20464                                   StoreSize);
20465
20466   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20467   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20468   return NewLd;
20469 }
20470
20471 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
20472   EVT NVT = N->getValueType(0);
20473   SDValue V = N->getOperand(0);
20474   uint64_t ExtIdx = N->getConstantOperandVal(1);
20475
20476   // Extract from UNDEF is UNDEF.
20477   if (V.isUndef())
20478     return DAG.getUNDEF(NVT);
20479
20480   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
20481     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
20482       return NarrowLoad;
20483
20484   // Combine an extract of an extract into a single extract_subvector.
20485   // ext (ext X, C), 0 --> ext X, C
20486   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
20487     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
20488                                     V.getConstantOperandVal(1)) &&
20489         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
20490       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
20491                          V.getOperand(1));
20492     }
20493   }
20494
20495   // Try to move vector bitcast after extract_subv by scaling extraction index:
20496   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
20497   if (V.getOpcode() == ISD::BITCAST &&
20498       V.getOperand(0).getValueType().isVector() &&
20499       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
20500     SDValue SrcOp = V.getOperand(0);
20501     EVT SrcVT = SrcOp.getValueType();
20502     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
20503     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
20504     if ((SrcNumElts % DestNumElts) == 0) {
20505       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
20506       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
20507       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
20508                                       NewExtEC);
20509       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20510         SDLoc DL(N);
20511         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
20512         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20513                                          V.getOperand(0), NewIndex);
20514         return DAG.getBitcast(NVT, NewExtract);
20515       }
20516     }
20517     if ((DestNumElts % SrcNumElts) == 0) {
20518       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
20519       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
20520         ElementCount NewExtEC =
20521             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
20522         EVT ScalarVT = SrcVT.getScalarType();
20523         if ((ExtIdx % DestSrcRatio) == 0) {
20524           SDLoc DL(N);
20525           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
20526           EVT NewExtVT =
20527               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
20528           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20529             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20530             SDValue NewExtract =
20531                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20532                             V.getOperand(0), NewIndex);
20533             return DAG.getBitcast(NVT, NewExtract);
20534           }
20535           if (NewExtEC.isScalar() &&
20536               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
20537             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20538             SDValue NewExtract =
20539                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
20540                             V.getOperand(0), NewIndex);
20541             return DAG.getBitcast(NVT, NewExtract);
20542           }
20543         }
20544       }
20545     }
20546   }
20547
20548   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
20549     unsigned ExtNumElts = NVT.getVectorMinNumElements();
20550     EVT ConcatSrcVT = V.getOperand(0).getValueType();
20551     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
20552            "Concat and extract subvector do not change element type");
20553     assert((ExtIdx % ExtNumElts) == 0 &&
20554            "Extract index is not a multiple of the input vector length.");
20555
20556     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
20557     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
20558
20559     // If the concatenated source types match this extract, it's a direct
20560     // simplification:
20561     // extract_subvec (concat V1, V2, ...), i --> Vi
20562     if (ConcatSrcNumElts == ExtNumElts)
20563       return V.getOperand(ConcatOpIdx);
20564
20565     // If the concatenated source vectors are a multiple length of this extract,
20566     // then extract a fraction of one of those source vectors directly from a
20567     // concat operand. Example:
20568     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
20569     //   v2i8 extract_subvec v8i8 Y, 6
20570     if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
20571       SDLoc DL(N);
20572       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
20573       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
20574              "Trying to extract from >1 concat operand?");
20575       assert(NewExtIdx % ExtNumElts == 0 &&
20576              "Extract index is not a multiple of the input vector length.");
20577       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
20578       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
20579                          V.getOperand(ConcatOpIdx), NewIndexC);
20580     }
20581   }
20582
20583   V = peekThroughBitcasts(V);
20584
20585   // If the input is a build vector. Try to make a smaller build vector.
20586   if (V.getOpcode() == ISD::BUILD_VECTOR) {
20587     EVT InVT = V.getValueType();
20588     unsigned ExtractSize = NVT.getSizeInBits();
20589     unsigned EltSize = InVT.getScalarSizeInBits();
20590     // Only do this if we won't split any elements.
20591     if (ExtractSize % EltSize == 0) {
20592       unsigned NumElems = ExtractSize / EltSize;
20593       EVT EltVT = InVT.getVectorElementType();
20594       EVT ExtractVT =
20595           NumElems == 1 ? EltVT
20596                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
20597       if ((Level < AfterLegalizeDAG ||
20598            (NumElems == 1 ||
20599             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
20600           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
20601         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
20602
20603         if (NumElems == 1) {
20604           SDValue Src = V->getOperand(IdxVal);
20605           if (EltVT != Src.getValueType())
20606             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
20607           return DAG.getBitcast(NVT, Src);
20608         }
20609
20610         // Extract the pieces from the original build_vector.
20611         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
20612                                               V->ops().slice(IdxVal, NumElems));
20613         return DAG.getBitcast(NVT, BuildVec);
20614       }
20615     }
20616   }
20617
20618   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
20619     // Handle only simple case where vector being inserted and vector
20620     // being extracted are of same size.
20621     EVT SmallVT = V.getOperand(1).getValueType();
20622     if (!NVT.bitsEq(SmallVT))
20623       return SDValue();
20624
20625     // Combine:
20626     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
20627     // Into:
20628     //    indices are equal or bit offsets are equal => V1
20629     //    otherwise => (extract_subvec V1, ExtIdx)
20630     uint64_t InsIdx = V.getConstantOperandVal(2);
20631     if (InsIdx * SmallVT.getScalarSizeInBits() ==
20632         ExtIdx * NVT.getScalarSizeInBits()) {
20633       if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
20634         return SDValue();
20635
20636       return DAG.getBitcast(NVT, V.getOperand(1));
20637     }
20638     return DAG.getNode(
20639         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
20640         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
20641         N->getOperand(1));
20642   }
20643
20644   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
20645     return NarrowBOp;
20646
20647   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20648     return SDValue(N, 0);
20649
20650   return SDValue();
20651 }
20652
20653 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
20654 /// followed by concatenation. Narrow vector ops may have better performance
20655 /// than wide ops, and this can unlock further narrowing of other vector ops.
20656 /// Targets can invert this transform later if it is not profitable.
20657 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
20658                                          SelectionDAG &DAG) {
20659   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
20660   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
20661       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
20662       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
20663     return SDValue();
20664
20665   // Split the wide shuffle mask into halves. Any mask element that is accessing
20666   // operand 1 is offset down to account for narrowing of the vectors.
20667   ArrayRef<int> Mask = Shuf->getMask();
20668   EVT VT = Shuf->getValueType(0);
20669   unsigned NumElts = VT.getVectorNumElements();
20670   unsigned HalfNumElts = NumElts / 2;
20671   SmallVector<int, 16> Mask0(HalfNumElts, -1);
20672   SmallVector<int, 16> Mask1(HalfNumElts, -1);
20673   for (unsigned i = 0; i != NumElts; ++i) {
20674     if (Mask[i] == -1)
20675       continue;
20676     // If we reference the upper (undef) subvector then the element is undef.
20677     if ((Mask[i] % NumElts) >= HalfNumElts)
20678       continue;
20679     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
20680     if (i < HalfNumElts)
20681       Mask0[i] = M;
20682     else
20683       Mask1[i - HalfNumElts] = M;
20684   }
20685
20686   // Ask the target if this is a valid transform.
20687   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20688   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
20689                                 HalfNumElts);
20690   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
20691       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
20692     return SDValue();
20693
20694   // shuffle (concat X, undef), (concat Y, undef), Mask -->
20695   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
20696   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
20697   SDLoc DL(Shuf);
20698   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
20699   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
20700   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
20701 }
20702
20703 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
20704 // or turn a shuffle of a single concat into simpler shuffle then concat.
20705 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
20706   EVT VT = N->getValueType(0);
20707   unsigned NumElts = VT.getVectorNumElements();
20708
20709   SDValue N0 = N->getOperand(0);
20710   SDValue N1 = N->getOperand(1);
20711   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20712   ArrayRef<int> Mask = SVN->getMask();
20713
20714   SmallVector<SDValue, 4> Ops;
20715   EVT ConcatVT = N0.getOperand(0).getValueType();
20716   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
20717   unsigned NumConcats = NumElts / NumElemsPerConcat;
20718
20719   auto IsUndefMaskElt = [](int i) { return i == -1; };
20720
20721   // Special case: shuffle(concat(A,B)) can be more efficiently represented
20722   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
20723   // half vector elements.
20724   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
20725       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
20726                    IsUndefMaskElt)) {
20727     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
20728                               N0.getOperand(1),
20729                               Mask.slice(0, NumElemsPerConcat));
20730     N1 = DAG.getUNDEF(ConcatVT);
20731     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
20732   }
20733
20734   // Look at every vector that's inserted. We're looking for exact
20735   // subvector-sized copies from a concatenated vector
20736   for (unsigned I = 0; I != NumConcats; ++I) {
20737     unsigned Begin = I * NumElemsPerConcat;
20738     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
20739
20740     // Make sure we're dealing with a copy.
20741     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
20742       Ops.push_back(DAG.getUNDEF(ConcatVT));
20743       continue;
20744     }
20745
20746     int OpIdx = -1;
20747     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
20748       if (IsUndefMaskElt(SubMask[i]))
20749         continue;
20750       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
20751         return SDValue();
20752       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
20753       if (0 <= OpIdx && EltOpIdx != OpIdx)
20754         return SDValue();
20755       OpIdx = EltOpIdx;
20756     }
20757     assert(0 <= OpIdx && "Unknown concat_vectors op");
20758
20759     if (OpIdx < (int)N0.getNumOperands())
20760       Ops.push_back(N0.getOperand(OpIdx));
20761     else
20762       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
20763   }
20764
20765   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20766 }
20767
20768 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20769 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20770 //
20771 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
20772 // a simplification in some sense, but it isn't appropriate in general: some
20773 // BUILD_VECTORs are substantially cheaper than others. The general case
20774 // of a BUILD_VECTOR requires inserting each element individually (or
20775 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
20776 // all constants is a single constant pool load.  A BUILD_VECTOR where each
20777 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
20778 // are undef lowers to a small number of element insertions.
20779 //
20780 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
20781 // We don't fold shuffles where one side is a non-zero constant, and we don't
20782 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
20783 // non-constant operands. This seems to work out reasonably well in practice.
20784 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
20785                                        SelectionDAG &DAG,
20786                                        const TargetLowering &TLI) {
20787   EVT VT = SVN->getValueType(0);
20788   unsigned NumElts = VT.getVectorNumElements();
20789   SDValue N0 = SVN->getOperand(0);
20790   SDValue N1 = SVN->getOperand(1);
20791
20792   if (!N0->hasOneUse())
20793     return SDValue();
20794
20795   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
20796   // discussed above.
20797   if (!N1.isUndef()) {
20798     if (!N1->hasOneUse())
20799       return SDValue();
20800
20801     bool N0AnyConst = isAnyConstantBuildVector(N0);
20802     bool N1AnyConst = isAnyConstantBuildVector(N1);
20803     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
20804       return SDValue();
20805     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
20806       return SDValue();
20807   }
20808
20809   // If both inputs are splats of the same value then we can safely merge this
20810   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
20811   bool IsSplat = false;
20812   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
20813   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
20814   if (BV0 && BV1)
20815     if (SDValue Splat0 = BV0->getSplatValue())
20816       IsSplat = (Splat0 == BV1->getSplatValue());
20817
20818   SmallVector<SDValue, 8> Ops;
20819   SmallSet<SDValue, 16> DuplicateOps;
20820   for (int M : SVN->getMask()) {
20821     SDValue Op = DAG.getUNDEF(VT.getScalarType());
20822     if (M >= 0) {
20823       int Idx = M < (int)NumElts ? M : M - NumElts;
20824       SDValue &S = (M < (int)NumElts ? N0 : N1);
20825       if (S.getOpcode() == ISD::BUILD_VECTOR) {
20826         Op = S.getOperand(Idx);
20827       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
20828         SDValue Op0 = S.getOperand(0);
20829         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
20830       } else {
20831         // Operand can't be combined - bail out.
20832         return SDValue();
20833       }
20834     }
20835
20836     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
20837     // generating a splat; semantically, this is fine, but it's likely to
20838     // generate low-quality code if the target can't reconstruct an appropriate
20839     // shuffle.
20840     if (!Op.isUndef() && !isIntOrFPConstant(Op))
20841       if (!IsSplat && !DuplicateOps.insert(Op).second)
20842         return SDValue();
20843
20844     Ops.push_back(Op);
20845   }
20846
20847   // BUILD_VECTOR requires all inputs to be of the same type, find the
20848   // maximum type and extend them all.
20849   EVT SVT = VT.getScalarType();
20850   if (SVT.isInteger())
20851     for (SDValue &Op : Ops)
20852       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
20853   if (SVT != VT.getScalarType())
20854     for (SDValue &Op : Ops)
20855       Op = TLI.isZExtFree(Op.getValueType(), SVT)
20856                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
20857                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
20858   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
20859 }
20860
20861 // Match shuffles that can be converted to any_vector_extend_in_reg.
20862 // This is often generated during legalization.
20863 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
20864 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
20865 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
20866                                             SelectionDAG &DAG,
20867                                             const TargetLowering &TLI,
20868                                             bool LegalOperations) {
20869   EVT VT = SVN->getValueType(0);
20870   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20871
20872   // TODO Add support for big-endian when we have a test case.
20873   if (!VT.isInteger() || IsBigEndian)
20874     return SDValue();
20875
20876   unsigned NumElts = VT.getVectorNumElements();
20877   unsigned EltSizeInBits = VT.getScalarSizeInBits();
20878   ArrayRef<int> Mask = SVN->getMask();
20879   SDValue N0 = SVN->getOperand(0);
20880
20881   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
20882   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
20883     for (unsigned i = 0; i != NumElts; ++i) {
20884       if (Mask[i] < 0)
20885         continue;
20886       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
20887         continue;
20888       return false;
20889     }
20890     return true;
20891   };
20892
20893   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
20894   // power-of-2 extensions as they are the most likely.
20895   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
20896     // Check for non power of 2 vector sizes
20897     if (NumElts % Scale != 0)
20898       continue;
20899     if (!isAnyExtend(Scale))
20900       continue;
20901
20902     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
20903     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
20904     // Never create an illegal type. Only create unsupported operations if we
20905     // are pre-legalization.
20906     if (TLI.isTypeLegal(OutVT))
20907       if (!LegalOperations ||
20908           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
20909         return DAG.getBitcast(VT,
20910                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
20911                                           SDLoc(SVN), OutVT, N0));
20912   }
20913
20914   return SDValue();
20915 }
20916
20917 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
20918 // each source element of a large type into the lowest elements of a smaller
20919 // destination type. This is often generated during legalization.
20920 // If the source node itself was a '*_extend_vector_inreg' node then we should
20921 // then be able to remove it.
20922 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
20923                                         SelectionDAG &DAG) {
20924   EVT VT = SVN->getValueType(0);
20925   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20926
20927   // TODO Add support for big-endian when we have a test case.
20928   if (!VT.isInteger() || IsBigEndian)
20929     return SDValue();
20930
20931   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
20932
20933   unsigned Opcode = N0.getOpcode();
20934   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
20935       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
20936       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
20937     return SDValue();
20938
20939   SDValue N00 = N0.getOperand(0);
20940   ArrayRef<int> Mask = SVN->getMask();
20941   unsigned NumElts = VT.getVectorNumElements();
20942   unsigned EltSizeInBits = VT.getScalarSizeInBits();
20943   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
20944   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
20945
20946   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
20947     return SDValue();
20948   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
20949
20950   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
20951   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
20952   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
20953   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
20954     for (unsigned i = 0; i != NumElts; ++i) {
20955       if (Mask[i] < 0)
20956         continue;
20957       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
20958         continue;
20959       return false;
20960     }
20961     return true;
20962   };
20963
20964   // At the moment we just handle the case where we've truncated back to the
20965   // same size as before the extension.
20966   // TODO: handle more extension/truncation cases as cases arise.
20967   if (EltSizeInBits != ExtSrcSizeInBits)
20968     return SDValue();
20969
20970   // We can remove *extend_vector_inreg only if the truncation happens at
20971   // the same scale as the extension.
20972   if (isTruncate(ExtScale))
20973     return DAG.getBitcast(VT, N00);
20974
20975   return SDValue();
20976 }
20977
20978 // Combine shuffles of splat-shuffles of the form:
20979 // shuffle (shuffle V, undef, splat-mask), undef, M
20980 // If splat-mask contains undef elements, we need to be careful about
20981 // introducing undef's in the folded mask which are not the result of composing
20982 // the masks of the shuffles.
20983 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
20984                                         SelectionDAG &DAG) {
20985   if (!Shuf->getOperand(1).isUndef())
20986     return SDValue();
20987   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20988   if (!Splat || !Splat->isSplat())
20989     return SDValue();
20990
20991   ArrayRef<int> ShufMask = Shuf->getMask();
20992   ArrayRef<int> SplatMask = Splat->getMask();
20993   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
20994
20995   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
20996   // every undef mask element in the splat-shuffle has a corresponding undef
20997   // element in the user-shuffle's mask or if the composition of mask elements
20998   // would result in undef.
20999   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
21000   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
21001   //   In this case it is not legal to simplify to the splat-shuffle because we
21002   //   may be exposing the users of the shuffle an undef element at index 1
21003   //   which was not there before the combine.
21004   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
21005   //   In this case the composition of masks yields SplatMask, so it's ok to
21006   //   simplify to the splat-shuffle.
21007   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
21008   //   In this case the composed mask includes all undef elements of SplatMask
21009   //   and in addition sets element zero to undef. It is safe to simplify to
21010   //   the splat-shuffle.
21011   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
21012                                        ArrayRef<int> SplatMask) {
21013     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
21014       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
21015           SplatMask[UserMask[i]] != -1)
21016         return false;
21017     return true;
21018   };
21019   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
21020     return Shuf->getOperand(0);
21021
21022   // Create a new shuffle with a mask that is composed of the two shuffles'
21023   // masks.
21024   SmallVector<int, 32> NewMask;
21025   for (int Idx : ShufMask)
21026     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
21027
21028   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
21029                               Splat->getOperand(0), Splat->getOperand(1),
21030                               NewMask);
21031 }
21032
21033 /// Combine shuffle of shuffle of the form:
21034 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
21035 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
21036                                      SelectionDAG &DAG) {
21037   if (!OuterShuf->getOperand(1).isUndef())
21038     return SDValue();
21039   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
21040   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
21041     return SDValue();
21042
21043   ArrayRef<int> OuterMask = OuterShuf->getMask();
21044   ArrayRef<int> InnerMask = InnerShuf->getMask();
21045   unsigned NumElts = OuterMask.size();
21046   assert(NumElts == InnerMask.size() && "Mask length mismatch");
21047   SmallVector<int, 32> CombinedMask(NumElts, -1);
21048   int SplatIndex = -1;
21049   for (unsigned i = 0; i != NumElts; ++i) {
21050     // Undef lanes remain undef.
21051     int OuterMaskElt = OuterMask[i];
21052     if (OuterMaskElt == -1)
21053       continue;
21054
21055     // Peek through the shuffle masks to get the underlying source element.
21056     int InnerMaskElt = InnerMask[OuterMaskElt];
21057     if (InnerMaskElt == -1)
21058       continue;
21059
21060     // Initialize the splatted element.
21061     if (SplatIndex == -1)
21062       SplatIndex = InnerMaskElt;
21063
21064     // Non-matching index - this is not a splat.
21065     if (SplatIndex != InnerMaskElt)
21066       return SDValue();
21067
21068     CombinedMask[i] = InnerMaskElt;
21069   }
21070   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
21071           getSplatIndex(CombinedMask) != -1) &&
21072          "Expected a splat mask");
21073
21074   // TODO: The transform may be a win even if the mask is not legal.
21075   EVT VT = OuterShuf->getValueType(0);
21076   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
21077   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
21078     return SDValue();
21079
21080   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
21081                               InnerShuf->getOperand(1), CombinedMask);
21082 }
21083
21084 /// If the shuffle mask is taking exactly one element from the first vector
21085 /// operand and passing through all other elements from the second vector
21086 /// operand, return the index of the mask element that is choosing an element
21087 /// from the first operand. Otherwise, return -1.
21088 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
21089   int MaskSize = Mask.size();
21090   int EltFromOp0 = -1;
21091   // TODO: This does not match if there are undef elements in the shuffle mask.
21092   // Should we ignore undefs in the shuffle mask instead? The trade-off is
21093   // removing an instruction (a shuffle), but losing the knowledge that some
21094   // vector lanes are not needed.
21095   for (int i = 0; i != MaskSize; ++i) {
21096     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
21097       // We're looking for a shuffle of exactly one element from operand 0.
21098       if (EltFromOp0 != -1)
21099         return -1;
21100       EltFromOp0 = i;
21101     } else if (Mask[i] != i + MaskSize) {
21102       // Nothing from operand 1 can change lanes.
21103       return -1;
21104     }
21105   }
21106   return EltFromOp0;
21107 }
21108
21109 /// If a shuffle inserts exactly one element from a source vector operand into
21110 /// another vector operand and we can access the specified element as a scalar,
21111 /// then we can eliminate the shuffle.
21112 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
21113                                       SelectionDAG &DAG) {
21114   // First, check if we are taking one element of a vector and shuffling that
21115   // element into another vector.
21116   ArrayRef<int> Mask = Shuf->getMask();
21117   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
21118   SDValue Op0 = Shuf->getOperand(0);
21119   SDValue Op1 = Shuf->getOperand(1);
21120   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
21121   if (ShufOp0Index == -1) {
21122     // Commute mask and check again.
21123     ShuffleVectorSDNode::commuteMask(CommutedMask);
21124     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
21125     if (ShufOp0Index == -1)
21126       return SDValue();
21127     // Commute operands to match the commuted shuffle mask.
21128     std::swap(Op0, Op1);
21129     Mask = CommutedMask;
21130   }
21131
21132   // The shuffle inserts exactly one element from operand 0 into operand 1.
21133   // Now see if we can access that element as a scalar via a real insert element
21134   // instruction.
21135   // TODO: We can try harder to locate the element as a scalar. Examples: it
21136   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
21137   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
21138          "Shuffle mask value must be from operand 0");
21139   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
21140     return SDValue();
21141
21142   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
21143   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
21144     return SDValue();
21145
21146   // There's an existing insertelement with constant insertion index, so we
21147   // don't need to check the legality/profitability of a replacement operation
21148   // that differs at most in the constant value. The target should be able to
21149   // lower any of those in a similar way. If not, legalization will expand this
21150   // to a scalar-to-vector plus shuffle.
21151   //
21152   // Note that the shuffle may move the scalar from the position that the insert
21153   // element used. Therefore, our new insert element occurs at the shuffle's
21154   // mask index value, not the insert's index value.
21155   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
21156   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
21157   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
21158                      Op1, Op0.getOperand(1), NewInsIndex);
21159 }
21160
21161 /// If we have a unary shuffle of a shuffle, see if it can be folded away
21162 /// completely. This has the potential to lose undef knowledge because the first
21163 /// shuffle may not have an undef mask element where the second one does. So
21164 /// only call this after doing simplifications based on demanded elements.
21165 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
21166   // shuf (shuf0 X, Y, Mask0), undef, Mask
21167   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21168   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
21169     return SDValue();
21170
21171   ArrayRef<int> Mask = Shuf->getMask();
21172   ArrayRef<int> Mask0 = Shuf0->getMask();
21173   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
21174     // Ignore undef elements.
21175     if (Mask[i] == -1)
21176       continue;
21177     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
21178
21179     // Is the element of the shuffle operand chosen by this shuffle the same as
21180     // the element chosen by the shuffle operand itself?
21181     if (Mask0[Mask[i]] != Mask0[i])
21182       return SDValue();
21183   }
21184   // Every element of this shuffle is identical to the result of the previous
21185   // shuffle, so we can replace this value.
21186   return Shuf->getOperand(0);
21187 }
21188
21189 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
21190   EVT VT = N->getValueType(0);
21191   unsigned NumElts = VT.getVectorNumElements();
21192
21193   SDValue N0 = N->getOperand(0);
21194   SDValue N1 = N->getOperand(1);
21195
21196   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
21197
21198   // Canonicalize shuffle undef, undef -> undef
21199   if (N0.isUndef() && N1.isUndef())
21200     return DAG.getUNDEF(VT);
21201
21202   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21203
21204   // Canonicalize shuffle v, v -> v, undef
21205   if (N0 == N1) {
21206     SmallVector<int, 8> NewMask;
21207     for (unsigned i = 0; i != NumElts; ++i) {
21208       int Idx = SVN->getMaskElt(i);
21209       if (Idx >= (int)NumElts) Idx -= NumElts;
21210       NewMask.push_back(Idx);
21211     }
21212     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
21213   }
21214
21215   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
21216   if (N0.isUndef())
21217     return DAG.getCommutedVectorShuffle(*SVN);
21218
21219   // Remove references to rhs if it is undef
21220   if (N1.isUndef()) {
21221     bool Changed = false;
21222     SmallVector<int, 8> NewMask;
21223     for (unsigned i = 0; i != NumElts; ++i) {
21224       int Idx = SVN->getMaskElt(i);
21225       if (Idx >= (int)NumElts) {
21226         Idx = -1;
21227         Changed = true;
21228       }
21229       NewMask.push_back(Idx);
21230     }
21231     if (Changed)
21232       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
21233   }
21234
21235   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
21236     return InsElt;
21237
21238   // A shuffle of a single vector that is a splatted value can always be folded.
21239   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
21240     return V;
21241
21242   if (SDValue V = formSplatFromShuffles(SVN, DAG))
21243     return V;
21244
21245   // If it is a splat, check if the argument vector is another splat or a
21246   // build_vector.
21247   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
21248     int SplatIndex = SVN->getSplatIndex();
21249     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
21250         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
21251       // splat (vector_bo L, R), Index -->
21252       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
21253       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
21254       SDLoc DL(N);
21255       EVT EltVT = VT.getScalarType();
21256       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
21257       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
21258       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
21259       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
21260                                   N0.getNode()->getFlags());
21261       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
21262       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
21263       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
21264     }
21265
21266     // If this is a bit convert that changes the element type of the vector but
21267     // not the number of vector elements, look through it.  Be careful not to
21268     // look though conversions that change things like v4f32 to v2f64.
21269     SDNode *V = N0.getNode();
21270     if (V->getOpcode() == ISD::BITCAST) {
21271       SDValue ConvInput = V->getOperand(0);
21272       if (ConvInput.getValueType().isVector() &&
21273           ConvInput.getValueType().getVectorNumElements() == NumElts)
21274         V = ConvInput.getNode();
21275     }
21276
21277     if (V->getOpcode() == ISD::BUILD_VECTOR) {
21278       assert(V->getNumOperands() == NumElts &&
21279              "BUILD_VECTOR has wrong number of operands");
21280       SDValue Base;
21281       bool AllSame = true;
21282       for (unsigned i = 0; i != NumElts; ++i) {
21283         if (!V->getOperand(i).isUndef()) {
21284           Base = V->getOperand(i);
21285           break;
21286         }
21287       }
21288       // Splat of <u, u, u, u>, return <u, u, u, u>
21289       if (!Base.getNode())
21290         return N0;
21291       for (unsigned i = 0; i != NumElts; ++i) {
21292         if (V->getOperand(i) != Base) {
21293           AllSame = false;
21294           break;
21295         }
21296       }
21297       // Splat of <x, x, x, x>, return <x, x, x, x>
21298       if (AllSame)
21299         return N0;
21300
21301       // Canonicalize any other splat as a build_vector.
21302       SDValue Splatted = V->getOperand(SplatIndex);
21303       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
21304       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
21305
21306       // We may have jumped through bitcasts, so the type of the
21307       // BUILD_VECTOR may not match the type of the shuffle.
21308       if (V->getValueType(0) != VT)
21309         NewBV = DAG.getBitcast(VT, NewBV);
21310       return NewBV;
21311     }
21312   }
21313
21314   // Simplify source operands based on shuffle mask.
21315   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21316     return SDValue(N, 0);
21317
21318   // This is intentionally placed after demanded elements simplification because
21319   // it could eliminate knowledge of undef elements created by this shuffle.
21320   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
21321     return ShufOp;
21322
21323   // Match shuffles that can be converted to any_vector_extend_in_reg.
21324   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
21325     return V;
21326
21327   // Combine "truncate_vector_in_reg" style shuffles.
21328   if (SDValue V = combineTruncationShuffle(SVN, DAG))
21329     return V;
21330
21331   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
21332       Level < AfterLegalizeVectorOps &&
21333       (N1.isUndef() ||
21334       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
21335        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
21336     if (SDValue V = partitionShuffleOfConcats(N, DAG))
21337       return V;
21338   }
21339
21340   // A shuffle of a concat of the same narrow vector can be reduced to use
21341   // only low-half elements of a concat with undef:
21342   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
21343   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
21344       N0.getNumOperands() == 2 &&
21345       N0.getOperand(0) == N0.getOperand(1)) {
21346     int HalfNumElts = (int)NumElts / 2;
21347     SmallVector<int, 8> NewMask;
21348     for (unsigned i = 0; i != NumElts; ++i) {
21349       int Idx = SVN->getMaskElt(i);
21350       if (Idx >= HalfNumElts) {
21351         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
21352         Idx -= HalfNumElts;
21353       }
21354       NewMask.push_back(Idx);
21355     }
21356     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
21357       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
21358       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
21359                                    N0.getOperand(0), UndefVec);
21360       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
21361     }
21362   }
21363
21364   // See if we can replace a shuffle with an insert_subvector.
21365   // e.g. v2i32 into v8i32:
21366   // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
21367   // --> insert_subvector(lhs,rhs1,4).
21368   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
21369       TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
21370     auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
21371       // Ensure RHS subvectors are legal.
21372       assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
21373       EVT SubVT = RHS.getOperand(0).getValueType();
21374       int NumSubVecs = RHS.getNumOperands();
21375       int NumSubElts = SubVT.getVectorNumElements();
21376       assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
21377       if (!TLI.isTypeLegal(SubVT))
21378         return SDValue();
21379
21380       // Don't bother if we have an unary shuffle (matches undef + LHS elts).
21381       if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
21382         return SDValue();
21383
21384       // Search [NumSubElts] spans for RHS sequence.
21385       // TODO: Can we avoid nested loops to increase performance?
21386       SmallVector<int> InsertionMask(NumElts);
21387       for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
21388         for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
21389           // Reset mask to identity.
21390           std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
21391
21392           // Add subvector insertion.
21393           std::iota(InsertionMask.begin() + SubIdx,
21394                     InsertionMask.begin() + SubIdx + NumSubElts,
21395                     NumElts + (SubVec * NumSubElts));
21396
21397           // See if the shuffle mask matches the reference insertion mask.
21398           bool MatchingShuffle = true;
21399           for (int i = 0; i != (int)NumElts; ++i) {
21400             int ExpectIdx = InsertionMask[i];
21401             int ActualIdx = Mask[i];
21402             if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
21403               MatchingShuffle = false;
21404               break;
21405             }
21406           }
21407
21408           if (MatchingShuffle)
21409             return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
21410                                RHS.getOperand(SubVec),
21411                                DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
21412         }
21413       }
21414       return SDValue();
21415     };
21416     ArrayRef<int> Mask = SVN->getMask();
21417     if (N1.getOpcode() == ISD::CONCAT_VECTORS)
21418       if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
21419         return InsertN1;
21420     if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
21421       SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
21422       ShuffleVectorSDNode::commuteMask(CommuteMask);
21423       if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
21424         return InsertN0;
21425     }
21426   }
21427
21428   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21429   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21430   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
21431     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
21432       return Res;
21433
21434   // If this shuffle only has a single input that is a bitcasted shuffle,
21435   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
21436   // back to their original types.
21437   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
21438       N1.isUndef() && Level < AfterLegalizeVectorOps &&
21439       TLI.isTypeLegal(VT)) {
21440
21441     SDValue BC0 = peekThroughOneUseBitcasts(N0);
21442     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
21443       EVT SVT = VT.getScalarType();
21444       EVT InnerVT = BC0->getValueType(0);
21445       EVT InnerSVT = InnerVT.getScalarType();
21446
21447       // Determine which shuffle works with the smaller scalar type.
21448       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
21449       EVT ScaleSVT = ScaleVT.getScalarType();
21450
21451       if (TLI.isTypeLegal(ScaleVT) &&
21452           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
21453           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
21454         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21455         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21456
21457         // Scale the shuffle masks to the smaller scalar type.
21458         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
21459         SmallVector<int, 8> InnerMask;
21460         SmallVector<int, 8> OuterMask;
21461         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
21462         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
21463
21464         // Merge the shuffle masks.
21465         SmallVector<int, 8> NewMask;
21466         for (int M : OuterMask)
21467           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
21468
21469         // Test for shuffle mask legality over both commutations.
21470         SDValue SV0 = BC0->getOperand(0);
21471         SDValue SV1 = BC0->getOperand(1);
21472         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21473         if (!LegalMask) {
21474           std::swap(SV0, SV1);
21475           ShuffleVectorSDNode::commuteMask(NewMask);
21476           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21477         }
21478
21479         if (LegalMask) {
21480           SV0 = DAG.getBitcast(ScaleVT, SV0);
21481           SV1 = DAG.getBitcast(ScaleVT, SV1);
21482           return DAG.getBitcast(
21483               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
21484         }
21485       }
21486     }
21487   }
21488
21489   // Compute the combined shuffle mask for a shuffle with SV0 as the first
21490   // operand, and SV1 as the second operand.
21491   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
21492   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
21493   auto MergeInnerShuffle =
21494       [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
21495                      ShuffleVectorSDNode *OtherSVN, SDValue N1,
21496                      const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
21497                      SmallVectorImpl<int> &Mask) -> bool {
21498     // Don't try to fold splats; they're likely to simplify somehow, or they
21499     // might be free.
21500     if (OtherSVN->isSplat())
21501       return false;
21502
21503     SV0 = SV1 = SDValue();
21504     Mask.clear();
21505
21506     for (unsigned i = 0; i != NumElts; ++i) {
21507       int Idx = SVN->getMaskElt(i);
21508       if (Idx < 0) {
21509         // Propagate Undef.
21510         Mask.push_back(Idx);
21511         continue;
21512       }
21513
21514       if (Commute)
21515         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
21516
21517       SDValue CurrentVec;
21518       if (Idx < (int)NumElts) {
21519         // This shuffle index refers to the inner shuffle N0. Lookup the inner
21520         // shuffle mask to identify which vector is actually referenced.
21521         Idx = OtherSVN->getMaskElt(Idx);
21522         if (Idx < 0) {
21523           // Propagate Undef.
21524           Mask.push_back(Idx);
21525           continue;
21526         }
21527         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
21528                                           : OtherSVN->getOperand(1);
21529       } else {
21530         // This shuffle index references an element within N1.
21531         CurrentVec = N1;
21532       }
21533
21534       // Simple case where 'CurrentVec' is UNDEF.
21535       if (CurrentVec.isUndef()) {
21536         Mask.push_back(-1);
21537         continue;
21538       }
21539
21540       // Canonicalize the shuffle index. We don't know yet if CurrentVec
21541       // will be the first or second operand of the combined shuffle.
21542       Idx = Idx % NumElts;
21543       if (!SV0.getNode() || SV0 == CurrentVec) {
21544         // Ok. CurrentVec is the left hand side.
21545         // Update the mask accordingly.
21546         SV0 = CurrentVec;
21547         Mask.push_back(Idx);
21548         continue;
21549       }
21550       if (!SV1.getNode() || SV1 == CurrentVec) {
21551         // Ok. CurrentVec is the right hand side.
21552         // Update the mask accordingly.
21553         SV1 = CurrentVec;
21554         Mask.push_back(Idx + NumElts);
21555         continue;
21556       }
21557
21558       // Last chance - see if the vector is another shuffle and if it
21559       // uses one of the existing candidate shuffle ops.
21560       if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
21561         int InnerIdx = CurrentSVN->getMaskElt(Idx);
21562         if (InnerIdx < 0) {
21563           Mask.push_back(-1);
21564           continue;
21565         }
21566         SDValue InnerVec = (InnerIdx < (int)NumElts)
21567                                ? CurrentSVN->getOperand(0)
21568                                : CurrentSVN->getOperand(1);
21569         if (InnerVec.isUndef()) {
21570           Mask.push_back(-1);
21571           continue;
21572         }
21573         InnerIdx %= NumElts;
21574         if (InnerVec == SV0) {
21575           Mask.push_back(InnerIdx);
21576           continue;
21577         }
21578         if (InnerVec == SV1) {
21579           Mask.push_back(InnerIdx + NumElts);
21580           continue;
21581         }
21582       }
21583
21584       // Bail out if we cannot convert the shuffle pair into a single shuffle.
21585       return false;
21586     }
21587
21588     if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21589       return true;
21590
21591     // Avoid introducing shuffles with illegal mask.
21592     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21593     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21594     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21595     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
21596     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
21597     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
21598     if (TLI.isShuffleMaskLegal(Mask, VT))
21599       return true;
21600
21601     std::swap(SV0, SV1);
21602     ShuffleVectorSDNode::commuteMask(Mask);
21603     return TLI.isShuffleMaskLegal(Mask, VT);
21604   };
21605
21606   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
21607     // Canonicalize shuffles according to rules:
21608     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
21609     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
21610     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
21611     if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21612         N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
21613       // The incoming shuffle must be of the same type as the result of the
21614       // current shuffle.
21615       assert(N1->getOperand(0).getValueType() == VT &&
21616              "Shuffle types don't match");
21617
21618       SDValue SV0 = N1->getOperand(0);
21619       SDValue SV1 = N1->getOperand(1);
21620       bool HasSameOp0 = N0 == SV0;
21621       bool IsSV1Undef = SV1.isUndef();
21622       if (HasSameOp0 || IsSV1Undef || N0 == SV1)
21623         // Commute the operands of this shuffle so merging below will trigger.
21624         return DAG.getCommutedVectorShuffle(*SVN);
21625     }
21626
21627     // Canonicalize splat shuffles to the RHS to improve merging below.
21628     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
21629     if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
21630         N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21631         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
21632         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
21633       return DAG.getCommutedVectorShuffle(*SVN);
21634     }
21635
21636     // Try to fold according to rules:
21637     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21638     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21639     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21640     // Don't try to fold shuffles with illegal type.
21641     // Only fold if this shuffle is the only user of the other shuffle.
21642     // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
21643     for (int i = 0; i != 2; ++i) {
21644       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
21645           N->isOnlyUserOf(N->getOperand(i).getNode())) {
21646         // The incoming shuffle must be of the same type as the result of the
21647         // current shuffle.
21648         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
21649         assert(OtherSV->getOperand(0).getValueType() == VT &&
21650                "Shuffle types don't match");
21651
21652         SDValue SV0, SV1;
21653         SmallVector<int, 4> Mask;
21654         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
21655                               SV0, SV1, Mask)) {
21656           // Check if all indices in Mask are Undef. In case, propagate Undef.
21657           if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21658             return DAG.getUNDEF(VT);
21659
21660           return DAG.getVectorShuffle(VT, SDLoc(N),
21661                                       SV0 ? SV0 : DAG.getUNDEF(VT),
21662                                       SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
21663         }
21664       }
21665     }
21666
21667     // Merge shuffles through binops if we are able to merge it with at least
21668     // one other shuffles.
21669     // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
21670     // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
21671     unsigned SrcOpcode = N0.getOpcode();
21672     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
21673         (N1.isUndef() ||
21674          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
21675       // Get binop source ops, or just pass on the undef.
21676       SDValue Op00 = N0.getOperand(0);
21677       SDValue Op01 = N0.getOperand(1);
21678       SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
21679       SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
21680       // TODO: We might be able to relax the VT check but we don't currently
21681       // have any isBinOp() that has different result/ops VTs so play safe until
21682       // we have test coverage.
21683       if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
21684           Op01.getValueType() == VT && Op11.getValueType() == VT &&
21685           (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
21686            Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
21687            Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
21688            Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
21689         auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
21690                                         SmallVectorImpl<int> &Mask, bool LeftOp,
21691                                         bool Commute) {
21692           SDValue InnerN = Commute ? N1 : N0;
21693           SDValue Op0 = LeftOp ? Op00 : Op01;
21694           SDValue Op1 = LeftOp ? Op10 : Op11;
21695           if (Commute)
21696             std::swap(Op0, Op1);
21697           // Only accept the merged shuffle if we don't introduce undef elements,
21698           // or the inner shuffle already contained undef elements.
21699           auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
21700           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
21701                  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
21702                                    Mask) &&
21703                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
21704                   llvm::none_of(Mask, [](int M) { return M < 0; }));
21705         };
21706
21707         // Ensure we don't increase the number of shuffles - we must merge a
21708         // shuffle from at least one of the LHS and RHS ops.
21709         bool MergedLeft = false;
21710         SDValue LeftSV0, LeftSV1;
21711         SmallVector<int, 4> LeftMask;
21712         if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
21713             CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
21714           MergedLeft = true;
21715         } else {
21716           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21717           LeftSV0 = Op00, LeftSV1 = Op10;
21718         }
21719
21720         bool MergedRight = false;
21721         SDValue RightSV0, RightSV1;
21722         SmallVector<int, 4> RightMask;
21723         if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
21724             CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
21725           MergedRight = true;
21726         } else {
21727           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21728           RightSV0 = Op01, RightSV1 = Op11;
21729         }
21730
21731         if (MergedLeft || MergedRight) {
21732           SDLoc DL(N);
21733           SDValue LHS = DAG.getVectorShuffle(
21734               VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
21735               LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
21736           SDValue RHS = DAG.getVectorShuffle(
21737               VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
21738               RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
21739           return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
21740         }
21741       }
21742     }
21743   }
21744
21745   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
21746     return V;
21747
21748   return SDValue();
21749 }
21750
21751 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
21752   SDValue InVal = N->getOperand(0);
21753   EVT VT = N->getValueType(0);
21754
21755   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
21756   // with a VECTOR_SHUFFLE and possible truncate.
21757   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21758       VT.isFixedLengthVector() &&
21759       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
21760     SDValue InVec = InVal->getOperand(0);
21761     SDValue EltNo = InVal->getOperand(1);
21762     auto InVecT = InVec.getValueType();
21763     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
21764       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
21765       int Elt = C0->getZExtValue();
21766       NewMask[0] = Elt;
21767       // If we have an implict truncate do truncate here as long as it's legal.
21768       // if it's not legal, this should
21769       if (VT.getScalarType() != InVal.getValueType() &&
21770           InVal.getValueType().isScalarInteger() &&
21771           isTypeLegal(VT.getScalarType())) {
21772         SDValue Val =
21773             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
21774         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
21775       }
21776       if (VT.getScalarType() == InVecT.getScalarType() &&
21777           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
21778         SDValue LegalShuffle =
21779           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
21780                                       DAG.getUNDEF(InVecT), NewMask, DAG);
21781         if (LegalShuffle) {
21782           // If the initial vector is the correct size this shuffle is a
21783           // valid result.
21784           if (VT == InVecT)
21785             return LegalShuffle;
21786           // If not we must truncate the vector.
21787           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
21788             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
21789             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
21790                                          InVecT.getVectorElementType(),
21791                                          VT.getVectorNumElements());
21792             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
21793                                LegalShuffle, ZeroIdx);
21794           }
21795         }
21796       }
21797     }
21798   }
21799
21800   return SDValue();
21801 }
21802
21803 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
21804   EVT VT = N->getValueType(0);
21805   SDValue N0 = N->getOperand(0);
21806   SDValue N1 = N->getOperand(1);
21807   SDValue N2 = N->getOperand(2);
21808   uint64_t InsIdx = N->getConstantOperandVal(2);
21809
21810   // If inserting an UNDEF, just return the original vector.
21811   if (N1.isUndef())
21812     return N0;
21813
21814   // If this is an insert of an extracted vector into an undef vector, we can
21815   // just use the input to the extract.
21816   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21817       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
21818     return N1.getOperand(0);
21819
21820   // If we are inserting a bitcast value into an undef, with the same
21821   // number of elements, just use the bitcast input of the extract.
21822   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
21823   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
21824   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
21825       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21826       N1.getOperand(0).getOperand(1) == N2 &&
21827       N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
21828           VT.getVectorElementCount() &&
21829       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
21830           VT.getSizeInBits()) {
21831     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
21832   }
21833
21834   // If both N1 and N2 are bitcast values on which insert_subvector
21835   // would makes sense, pull the bitcast through.
21836   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
21837   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
21838   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
21839     SDValue CN0 = N0.getOperand(0);
21840     SDValue CN1 = N1.getOperand(0);
21841     EVT CN0VT = CN0.getValueType();
21842     EVT CN1VT = CN1.getValueType();
21843     if (CN0VT.isVector() && CN1VT.isVector() &&
21844         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
21845         CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
21846       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
21847                                       CN0.getValueType(), CN0, CN1, N2);
21848       return DAG.getBitcast(VT, NewINSERT);
21849     }
21850   }
21851
21852   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
21853   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
21854   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
21855   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
21856       N0.getOperand(1).getValueType() == N1.getValueType() &&
21857       N0.getOperand(2) == N2)
21858     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
21859                        N1, N2);
21860
21861   // Eliminate an intermediate insert into an undef vector:
21862   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
21863   // insert_subvector undef, X, N2
21864   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
21865       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
21866     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
21867                        N1.getOperand(1), N2);
21868
21869   // Push subvector bitcasts to the output, adjusting the index as we go.
21870   // insert_subvector(bitcast(v), bitcast(s), c1)
21871   // -> bitcast(insert_subvector(v, s, c2))
21872   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
21873       N1.getOpcode() == ISD::BITCAST) {
21874     SDValue N0Src = peekThroughBitcasts(N0);
21875     SDValue N1Src = peekThroughBitcasts(N1);
21876     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
21877     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
21878     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
21879         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
21880       EVT NewVT;
21881       SDLoc DL(N);
21882       SDValue NewIdx;
21883       LLVMContext &Ctx = *DAG.getContext();
21884       ElementCount NumElts = VT.getVectorElementCount();
21885       unsigned EltSizeInBits = VT.getScalarSizeInBits();
21886       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
21887         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
21888         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
21889         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
21890       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
21891         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
21892         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
21893           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
21894                                    NumElts.divideCoefficientBy(Scale));
21895           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
21896         }
21897       }
21898       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
21899         SDValue Res = DAG.getBitcast(NewVT, N0Src);
21900         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
21901         return DAG.getBitcast(VT, Res);
21902       }
21903     }
21904   }
21905
21906   // Canonicalize insert_subvector dag nodes.
21907   // Example:
21908   // (insert_subvector (insert_subvector A, Idx0), Idx1)
21909   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
21910   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
21911       N1.getValueType() == N0.getOperand(1).getValueType()) {
21912     unsigned OtherIdx = N0.getConstantOperandVal(2);
21913     if (InsIdx < OtherIdx) {
21914       // Swap nodes.
21915       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
21916                                   N0.getOperand(0), N1, N2);
21917       AddToWorklist(NewOp.getNode());
21918       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
21919                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
21920     }
21921   }
21922
21923   // If the input vector is a concatenation, and the insert replaces
21924   // one of the pieces, we can optimize into a single concat_vectors.
21925   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
21926       N0.getOperand(0).getValueType() == N1.getValueType() &&
21927       N0.getOperand(0).getValueType().isScalableVector() ==
21928           N1.getValueType().isScalableVector()) {
21929     unsigned Factor = N1.getValueType().getVectorMinNumElements();
21930     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
21931     Ops[InsIdx / Factor] = N1;
21932     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21933   }
21934
21935   // Simplify source operands based on insertion.
21936   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21937     return SDValue(N, 0);
21938
21939   return SDValue();
21940 }
21941
21942 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
21943   SDValue N0 = N->getOperand(0);
21944
21945   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
21946   if (N0->getOpcode() == ISD::FP16_TO_FP)
21947     return N0->getOperand(0);
21948
21949   return SDValue();
21950 }
21951
21952 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
21953   SDValue N0 = N->getOperand(0);
21954
21955   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
21956   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
21957     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
21958     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
21959       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
21960                          N0.getOperand(0));
21961     }
21962   }
21963
21964   return SDValue();
21965 }
21966
21967 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
21968   SDValue N0 = N->getOperand(0);
21969   EVT VT = N0.getValueType();
21970   unsigned Opcode = N->getOpcode();
21971
21972   // VECREDUCE over 1-element vector is just an extract.
21973   if (VT.getVectorElementCount().isScalar()) {
21974     SDLoc dl(N);
21975     SDValue Res =
21976         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
21977                     DAG.getVectorIdxConstant(0, dl));
21978     if (Res.getValueType() != N->getValueType(0))
21979       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
21980     return Res;
21981   }
21982
21983   // On an boolean vector an and/or reduction is the same as a umin/umax
21984   // reduction. Convert them if the latter is legal while the former isn't.
21985   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
21986     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
21987         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
21988     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
21989         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
21990         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
21991       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
21992   }
21993
21994   return SDValue();
21995 }
21996
21997 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
21998 /// with the destination vector and a zero vector.
21999 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
22000 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
22001 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
22002   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
22003
22004   EVT VT = N->getValueType(0);
22005   SDValue LHS = N->getOperand(0);
22006   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
22007   SDLoc DL(N);
22008
22009   // Make sure we're not running after operation legalization where it
22010   // may have custom lowered the vector shuffles.
22011   if (LegalOperations)
22012     return SDValue();
22013
22014   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
22015     return SDValue();
22016
22017   EVT RVT = RHS.getValueType();
22018   unsigned NumElts = RHS.getNumOperands();
22019
22020   // Attempt to create a valid clear mask, splitting the mask into
22021   // sub elements and checking to see if each is
22022   // all zeros or all ones - suitable for shuffle masking.
22023   auto BuildClearMask = [&](int Split) {
22024     int NumSubElts = NumElts * Split;
22025     int NumSubBits = RVT.getScalarSizeInBits() / Split;
22026
22027     SmallVector<int, 8> Indices;
22028     for (int i = 0; i != NumSubElts; ++i) {
22029       int EltIdx = i / Split;
22030       int SubIdx = i % Split;
22031       SDValue Elt = RHS.getOperand(EltIdx);
22032       // X & undef --> 0 (not undef). So this lane must be converted to choose
22033       // from the zero constant vector (same as if the element had all 0-bits).
22034       if (Elt.isUndef()) {
22035         Indices.push_back(i + NumSubElts);
22036         continue;
22037       }
22038
22039       APInt Bits;
22040       if (isa<ConstantSDNode>(Elt))
22041         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
22042       else if (isa<ConstantFPSDNode>(Elt))
22043         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
22044       else
22045         return SDValue();
22046
22047       // Extract the sub element from the constant bit mask.
22048       if (DAG.getDataLayout().isBigEndian())
22049         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
22050       else
22051         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
22052
22053       if (Bits.isAllOnesValue())
22054         Indices.push_back(i);
22055       else if (Bits == 0)
22056         Indices.push_back(i + NumSubElts);
22057       else
22058         return SDValue();
22059     }
22060
22061     // Let's see if the target supports this vector_shuffle.
22062     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
22063     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
22064     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
22065       return SDValue();
22066
22067     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
22068     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
22069                                                    DAG.getBitcast(ClearVT, LHS),
22070                                                    Zero, Indices));
22071   };
22072
22073   // Determine maximum split level (byte level masking).
22074   int MaxSplit = 1;
22075   if (RVT.getScalarSizeInBits() % 8 == 0)
22076     MaxSplit = RVT.getScalarSizeInBits() / 8;
22077
22078   for (int Split = 1; Split <= MaxSplit; ++Split)
22079     if (RVT.getScalarSizeInBits() % Split == 0)
22080       if (SDValue S = BuildClearMask(Split))
22081         return S;
22082
22083   return SDValue();
22084 }
22085
22086 /// If a vector binop is performed on splat values, it may be profitable to
22087 /// extract, scalarize, and insert/splat.
22088 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
22089   SDValue N0 = N->getOperand(0);
22090   SDValue N1 = N->getOperand(1);
22091   unsigned Opcode = N->getOpcode();
22092   EVT VT = N->getValueType(0);
22093   EVT EltVT = VT.getVectorElementType();
22094   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22095
22096   // TODO: Remove/replace the extract cost check? If the elements are available
22097   //       as scalars, then there may be no extract cost. Should we ask if
22098   //       inserting a scalar back into a vector is cheap instead?
22099   int Index0, Index1;
22100   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
22101   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
22102   if (!Src0 || !Src1 || Index0 != Index1 ||
22103       Src0.getValueType().getVectorElementType() != EltVT ||
22104       Src1.getValueType().getVectorElementType() != EltVT ||
22105       !TLI.isExtractVecEltCheap(VT, Index0) ||
22106       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
22107     return SDValue();
22108
22109   SDLoc DL(N);
22110   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
22111   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
22112   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
22113   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
22114
22115   // If all lanes but 1 are undefined, no need to splat the scalar result.
22116   // TODO: Keep track of undefs and use that info in the general case.
22117   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
22118       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
22119       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
22120     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
22121     // build_vec ..undef, (bo X, Y), undef...
22122     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
22123     Ops[Index0] = ScalarBO;
22124     return DAG.getBuildVector(VT, DL, Ops);
22125   }
22126
22127   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
22128   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
22129   return DAG.getBuildVector(VT, DL, Ops);
22130 }
22131
22132 /// Visit a binary vector operation, like ADD.
22133 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
22134   assert(N->getValueType(0).isVector() &&
22135          "SimplifyVBinOp only works on vectors!");
22136
22137   SDValue LHS = N->getOperand(0);
22138   SDValue RHS = N->getOperand(1);
22139   SDValue Ops[] = {LHS, RHS};
22140   EVT VT = N->getValueType(0);
22141   unsigned Opcode = N->getOpcode();
22142   SDNodeFlags Flags = N->getFlags();
22143
22144   // See if we can constant fold the vector operation.
22145   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
22146           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
22147     return Fold;
22148
22149   // Move unary shuffles with identical masks after a vector binop:
22150   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
22151   //   --> shuffle (VBinOp A, B), Undef, Mask
22152   // This does not require type legality checks because we are creating the
22153   // same types of operations that are in the original sequence. We do have to
22154   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
22155   // though. This code is adapted from the identical transform in instcombine.
22156   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
22157       Opcode != ISD::UREM && Opcode != ISD::SREM &&
22158       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
22159     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
22160     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
22161     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
22162         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
22163         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
22164       SDLoc DL(N);
22165       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
22166                                      RHS.getOperand(0), Flags);
22167       SDValue UndefV = LHS.getOperand(1);
22168       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
22169     }
22170
22171     // Try to sink a splat shuffle after a binop with a uniform constant.
22172     // This is limited to cases where neither the shuffle nor the constant have
22173     // undefined elements because that could be poison-unsafe or inhibit
22174     // demanded elements analysis. It is further limited to not change a splat
22175     // of an inserted scalar because that may be optimized better by
22176     // load-folding or other target-specific behaviors.
22177     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
22178         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
22179         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22180       // binop (splat X), (splat C) --> splat (binop X, C)
22181       SDLoc DL(N);
22182       SDValue X = Shuf0->getOperand(0);
22183       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
22184       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22185                                   Shuf0->getMask());
22186     }
22187     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
22188         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
22189         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
22190       // binop (splat C), (splat X) --> splat (binop C, X)
22191       SDLoc DL(N);
22192       SDValue X = Shuf1->getOperand(0);
22193       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
22194       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
22195                                   Shuf1->getMask());
22196     }
22197   }
22198
22199   // The following pattern is likely to emerge with vector reduction ops. Moving
22200   // the binary operation ahead of insertion may allow using a narrower vector
22201   // instruction that has better performance than the wide version of the op:
22202   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
22203   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
22204       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
22205       LHS.getOperand(2) == RHS.getOperand(2) &&
22206       (LHS.hasOneUse() || RHS.hasOneUse())) {
22207     SDValue X = LHS.getOperand(1);
22208     SDValue Y = RHS.getOperand(1);
22209     SDValue Z = LHS.getOperand(2);
22210     EVT NarrowVT = X.getValueType();
22211     if (NarrowVT == Y.getValueType() &&
22212         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
22213                                               LegalOperations)) {
22214       // (binop undef, undef) may not return undef, so compute that result.
22215       SDLoc DL(N);
22216       SDValue VecC =
22217           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
22218       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
22219       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
22220     }
22221   }
22222
22223   // Make sure all but the first op are undef or constant.
22224   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
22225     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
22226            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
22227              return Op.isUndef() ||
22228                     ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
22229            });
22230   };
22231
22232   // The following pattern is likely to emerge with vector reduction ops. Moving
22233   // the binary operation ahead of the concat may allow using a narrower vector
22234   // instruction that has better performance than the wide version of the op:
22235   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
22236   //   concat (VBinOp X, Y), VecC
22237   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
22238       (LHS.hasOneUse() || RHS.hasOneUse())) {
22239     EVT NarrowVT = LHS.getOperand(0).getValueType();
22240     if (NarrowVT == RHS.getOperand(0).getValueType() &&
22241         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
22242       SDLoc DL(N);
22243       unsigned NumOperands = LHS.getNumOperands();
22244       SmallVector<SDValue, 4> ConcatOps;
22245       for (unsigned i = 0; i != NumOperands; ++i) {
22246         // This constant fold for operands 1 and up.
22247         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
22248                                         RHS.getOperand(i)));
22249       }
22250
22251       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22252     }
22253   }
22254
22255   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
22256     return V;
22257
22258   return SDValue();
22259 }
22260
22261 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
22262                                     SDValue N2) {
22263   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
22264
22265   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
22266                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
22267
22268   // If we got a simplified select_cc node back from SimplifySelectCC, then
22269   // break it down into a new SETCC node, and a new SELECT node, and then return
22270   // the SELECT node, since we were called with a SELECT node.
22271   if (SCC.getNode()) {
22272     // Check to see if we got a select_cc back (to turn into setcc/select).
22273     // Otherwise, just return whatever node we got back, like fabs.
22274     if (SCC.getOpcode() == ISD::SELECT_CC) {
22275       const SDNodeFlags Flags = N0.getNode()->getFlags();
22276       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
22277                                   N0.getValueType(),
22278                                   SCC.getOperand(0), SCC.getOperand(1),
22279                                   SCC.getOperand(4), Flags);
22280       AddToWorklist(SETCC.getNode());
22281       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
22282                                          SCC.getOperand(2), SCC.getOperand(3));
22283       SelectNode->setFlags(Flags);
22284       return SelectNode;
22285     }
22286
22287     return SCC;
22288   }
22289   return SDValue();
22290 }
22291
22292 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
22293 /// being selected between, see if we can simplify the select.  Callers of this
22294 /// should assume that TheSelect is deleted if this returns true.  As such, they
22295 /// should return the appropriate thing (e.g. the node) back to the top-level of
22296 /// the DAG combiner loop to avoid it being looked at.
22297 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
22298                                     SDValue RHS) {
22299   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22300   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
22301   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
22302     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
22303       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
22304       SDValue Sqrt = RHS;
22305       ISD::CondCode CC;
22306       SDValue CmpLHS;
22307       const ConstantFPSDNode *Zero = nullptr;
22308
22309       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
22310         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
22311         CmpLHS = TheSelect->getOperand(0);
22312         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
22313       } else {
22314         // SELECT or VSELECT
22315         SDValue Cmp = TheSelect->getOperand(0);
22316         if (Cmp.getOpcode() == ISD::SETCC) {
22317           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
22318           CmpLHS = Cmp.getOperand(0);
22319           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
22320         }
22321       }
22322       if (Zero && Zero->isZero() &&
22323           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
22324           CC == ISD::SETULT || CC == ISD::SETLT)) {
22325         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
22326         CombineTo(TheSelect, Sqrt);
22327         return true;
22328       }
22329     }
22330   }
22331   // Cannot simplify select with vector condition
22332   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
22333
22334   // If this is a select from two identical things, try to pull the operation
22335   // through the select.
22336   if (LHS.getOpcode() != RHS.getOpcode() ||
22337       !LHS.hasOneUse() || !RHS.hasOneUse())
22338     return false;
22339
22340   // If this is a load and the token chain is identical, replace the select
22341   // of two loads with a load through a select of the address to load from.
22342   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
22343   // constants have been dropped into the constant pool.
22344   if (LHS.getOpcode() == ISD::LOAD) {
22345     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
22346     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
22347
22348     // Token chains must be identical.
22349     if (LHS.getOperand(0) != RHS.getOperand(0) ||
22350         // Do not let this transformation reduce the number of volatile loads.
22351         // Be conservative for atomics for the moment
22352         // TODO: This does appear to be legal for unordered atomics (see D66309)
22353         !LLD->isSimple() || !RLD->isSimple() ||
22354         // FIXME: If either is a pre/post inc/dec load,
22355         // we'd need to split out the address adjustment.
22356         LLD->isIndexed() || RLD->isIndexed() ||
22357         // If this is an EXTLOAD, the VT's must match.
22358         LLD->getMemoryVT() != RLD->getMemoryVT() ||
22359         // If this is an EXTLOAD, the kind of extension must match.
22360         (LLD->getExtensionType() != RLD->getExtensionType() &&
22361          // The only exception is if one of the extensions is anyext.
22362          LLD->getExtensionType() != ISD::EXTLOAD &&
22363          RLD->getExtensionType() != ISD::EXTLOAD) ||
22364         // FIXME: this discards src value information.  This is
22365         // over-conservative. It would be beneficial to be able to remember
22366         // both potential memory locations.  Since we are discarding
22367         // src value info, don't do the transformation if the memory
22368         // locations are not in the default address space.
22369         LLD->getPointerInfo().getAddrSpace() != 0 ||
22370         RLD->getPointerInfo().getAddrSpace() != 0 ||
22371         // We can't produce a CMOV of a TargetFrameIndex since we won't
22372         // generate the address generation required.
22373         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22374         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22375         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
22376                                       LLD->getBasePtr().getValueType()))
22377       return false;
22378
22379     // The loads must not depend on one another.
22380     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
22381       return false;
22382
22383     // Check that the select condition doesn't reach either load.  If so,
22384     // folding this will induce a cycle into the DAG.  If not, this is safe to
22385     // xform, so create a select of the addresses.
22386
22387     SmallPtrSet<const SDNode *, 32> Visited;
22388     SmallVector<const SDNode *, 16> Worklist;
22389
22390     // Always fail if LLD and RLD are not independent. TheSelect is a
22391     // predecessor to all Nodes in question so we need not search past it.
22392
22393     Visited.insert(TheSelect);
22394     Worklist.push_back(LLD);
22395     Worklist.push_back(RLD);
22396
22397     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
22398         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
22399       return false;
22400
22401     SDValue Addr;
22402     if (TheSelect->getOpcode() == ISD::SELECT) {
22403       // We cannot do this optimization if any pair of {RLD, LLD} is a
22404       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
22405       // Loads, we only need to check if CondNode is a successor to one of the
22406       // loads. We can further avoid this if there's no use of their chain
22407       // value.
22408       SDNode *CondNode = TheSelect->getOperand(0).getNode();
22409       Worklist.push_back(CondNode);
22410
22411       if ((LLD->hasAnyUseOfValue(1) &&
22412            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22413           (RLD->hasAnyUseOfValue(1) &&
22414            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22415         return false;
22416
22417       Addr = DAG.getSelect(SDLoc(TheSelect),
22418                            LLD->getBasePtr().getValueType(),
22419                            TheSelect->getOperand(0), LLD->getBasePtr(),
22420                            RLD->getBasePtr());
22421     } else {  // Otherwise SELECT_CC
22422       // We cannot do this optimization if any pair of {RLD, LLD} is a
22423       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
22424       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
22425       // one of the loads. We can further avoid this if there's no use of their
22426       // chain value.
22427
22428       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
22429       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
22430       Worklist.push_back(CondLHS);
22431       Worklist.push_back(CondRHS);
22432
22433       if ((LLD->hasAnyUseOfValue(1) &&
22434            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22435           (RLD->hasAnyUseOfValue(1) &&
22436            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22437         return false;
22438
22439       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
22440                          LLD->getBasePtr().getValueType(),
22441                          TheSelect->getOperand(0),
22442                          TheSelect->getOperand(1),
22443                          LLD->getBasePtr(), RLD->getBasePtr(),
22444                          TheSelect->getOperand(4));
22445     }
22446
22447     SDValue Load;
22448     // It is safe to replace the two loads if they have different alignments,
22449     // but the new load must be the minimum (most restrictive) alignment of the
22450     // inputs.
22451     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
22452     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
22453     if (!RLD->isInvariant())
22454       MMOFlags &= ~MachineMemOperand::MOInvariant;
22455     if (!RLD->isDereferenceable())
22456       MMOFlags &= ~MachineMemOperand::MODereferenceable;
22457     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
22458       // FIXME: Discards pointer and AA info.
22459       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
22460                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
22461                          MMOFlags);
22462     } else {
22463       // FIXME: Discards pointer and AA info.
22464       Load = DAG.getExtLoad(
22465           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
22466                                                   : LLD->getExtensionType(),
22467           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
22468           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
22469     }
22470
22471     // Users of the select now use the result of the load.
22472     CombineTo(TheSelect, Load);
22473
22474     // Users of the old loads now use the new load's chain.  We know the
22475     // old-load value is dead now.
22476     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
22477     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
22478     return true;
22479   }
22480
22481   return false;
22482 }
22483
22484 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
22485 /// bitwise 'and'.
22486 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
22487                                             SDValue N1, SDValue N2, SDValue N3,
22488                                             ISD::CondCode CC) {
22489   // If this is a select where the false operand is zero and the compare is a
22490   // check of the sign bit, see if we can perform the "gzip trick":
22491   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
22492   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
22493   EVT XType = N0.getValueType();
22494   EVT AType = N2.getValueType();
22495   if (!isNullConstant(N3) || !XType.bitsGE(AType))
22496     return SDValue();
22497
22498   // If the comparison is testing for a positive value, we have to invert
22499   // the sign bit mask, so only do that transform if the target has a bitwise
22500   // 'and not' instruction (the invert is free).
22501   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
22502     // (X > -1) ? A : 0
22503     // (X >  0) ? X : 0 <-- This is canonical signed max.
22504     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
22505       return SDValue();
22506   } else if (CC == ISD::SETLT) {
22507     // (X <  0) ? A : 0
22508     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
22509     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
22510       return SDValue();
22511   } else {
22512     return SDValue();
22513   }
22514
22515   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
22516   // constant.
22517   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
22518   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22519   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
22520     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
22521     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
22522       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22523       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
22524       AddToWorklist(Shift.getNode());
22525
22526       if (XType.bitsGT(AType)) {
22527         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22528         AddToWorklist(Shift.getNode());
22529       }
22530
22531       if (CC == ISD::SETGT)
22532         Shift = DAG.getNOT(DL, Shift, AType);
22533
22534       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22535     }
22536   }
22537
22538   unsigned ShCt = XType.getSizeInBits() - 1;
22539   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
22540     return SDValue();
22541
22542   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22543   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
22544   AddToWorklist(Shift.getNode());
22545
22546   if (XType.bitsGT(AType)) {
22547     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22548     AddToWorklist(Shift.getNode());
22549   }
22550
22551   if (CC == ISD::SETGT)
22552     Shift = DAG.getNOT(DL, Shift, AType);
22553
22554   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22555 }
22556
22557 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
22558 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
22559   SDValue N0 = N->getOperand(0);
22560   SDValue N1 = N->getOperand(1);
22561   SDValue N2 = N->getOperand(2);
22562   EVT VT = N->getValueType(0);
22563   SDLoc DL(N);
22564
22565   unsigned BinOpc = N1.getOpcode();
22566   if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
22567     return SDValue();
22568
22569   if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode()))
22570     return SDValue();
22571
22572   // Fold select(cond, binop(x, y), binop(z, y))
22573   //  --> binop(select(cond, x, z), y)
22574   if (N1.getOperand(1) == N2.getOperand(1)) {
22575     SDValue NewSel =
22576         DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
22577     SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1));
22578     NewBinOp->setFlags(N1->getFlags());
22579     NewBinOp->intersectFlagsWith(N2->getFlags());
22580     return NewBinOp;
22581   }
22582
22583   // Fold select(cond, binop(x, y), binop(x, z))
22584   //  --> binop(x, select(cond, y, z))
22585   // Second op VT might be different (e.g. shift amount type)
22586   if (N1.getOperand(0) == N2.getOperand(0) &&
22587       VT == N1.getOperand(1).getValueType() &&
22588       VT == N2.getOperand(1).getValueType()) {
22589     SDValue NewSel =
22590         DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
22591     SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel);
22592     NewBinOp->setFlags(N1->getFlags());
22593     NewBinOp->intersectFlagsWith(N2->getFlags());
22594     return NewBinOp;
22595   }
22596
22597   // TODO: Handle isCommutativeBinOp patterns as well?
22598   return SDValue();
22599 }
22600
22601 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
22602 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
22603   SDValue N0 = N->getOperand(0);
22604   EVT VT = N->getValueType(0);
22605   bool IsFabs = N->getOpcode() == ISD::FABS;
22606   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
22607
22608   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
22609     return SDValue();
22610
22611   SDValue Int = N0.getOperand(0);
22612   EVT IntVT = Int.getValueType();
22613
22614   // The operand to cast should be integer.
22615   if (!IntVT.isInteger() || IntVT.isVector())
22616     return SDValue();
22617
22618   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
22619   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
22620   APInt SignMask;
22621   if (N0.getValueType().isVector()) {
22622     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
22623     // 0x7f...) per element and splat it.
22624     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
22625     if (IsFabs)
22626       SignMask = ~SignMask;
22627     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
22628   } else {
22629     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
22630     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
22631     if (IsFabs)
22632       SignMask = ~SignMask;
22633   }
22634   SDLoc DL(N0);
22635   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
22636                     DAG.getConstant(SignMask, DL, IntVT));
22637   AddToWorklist(Int.getNode());
22638   return DAG.getBitcast(VT, Int);
22639 }
22640
22641 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
22642 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
22643 /// in it. This may be a win when the constant is not otherwise available
22644 /// because it replaces two constant pool loads with one.
22645 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
22646     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
22647     ISD::CondCode CC) {
22648   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
22649     return SDValue();
22650
22651   // If we are before legalize types, we want the other legalization to happen
22652   // first (for example, to avoid messing with soft float).
22653   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
22654   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
22655   EVT VT = N2.getValueType();
22656   if (!TV || !FV || !TLI.isTypeLegal(VT))
22657     return SDValue();
22658
22659   // If a constant can be materialized without loads, this does not make sense.
22660   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
22661       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
22662       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
22663     return SDValue();
22664
22665   // If both constants have multiple uses, then we won't need to do an extra
22666   // load. The values are likely around in registers for other users.
22667   if (!TV->hasOneUse() && !FV->hasOneUse())
22668     return SDValue();
22669
22670   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
22671                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
22672   Type *FPTy = Elts[0]->getType();
22673   const DataLayout &TD = DAG.getDataLayout();
22674
22675   // Create a ConstantArray of the two constants.
22676   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
22677   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
22678                                       TD.getPrefTypeAlign(FPTy));
22679   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
22680
22681   // Get offsets to the 0 and 1 elements of the array, so we can select between
22682   // them.
22683   SDValue Zero = DAG.getIntPtrConstant(0, DL);
22684   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
22685   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
22686   SDValue Cond =
22687       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
22688   AddToWorklist(Cond.getNode());
22689   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
22690   AddToWorklist(CstOffset.getNode());
22691   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
22692   AddToWorklist(CPIdx.getNode());
22693   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
22694                      MachinePointerInfo::getConstantPool(
22695                          DAG.getMachineFunction()), Alignment);
22696 }
22697
22698 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
22699 /// where 'cond' is the comparison specified by CC.
22700 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
22701                                       SDValue N2, SDValue N3, ISD::CondCode CC,
22702                                       bool NotExtCompare) {
22703   // (x ? y : y) -> y.
22704   if (N2 == N3) return N2;
22705
22706   EVT CmpOpVT = N0.getValueType();
22707   EVT CmpResVT = getSetCCResultType(CmpOpVT);
22708   EVT VT = N2.getValueType();
22709   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
22710   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22711   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
22712
22713   // Determine if the condition we're dealing with is constant.
22714   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
22715     AddToWorklist(SCC.getNode());
22716     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
22717       // fold select_cc true, x, y -> x
22718       // fold select_cc false, x, y -> y
22719       return !(SCCC->isNullValue()) ? N2 : N3;
22720     }
22721   }
22722
22723   if (SDValue V =
22724           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
22725     return V;
22726
22727   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
22728     return V;
22729
22730   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
22731   // where y is has a single bit set.
22732   // A plaintext description would be, we can turn the SELECT_CC into an AND
22733   // when the condition can be materialized as an all-ones register.  Any
22734   // single bit-test can be materialized as an all-ones register with
22735   // shift-left and shift-right-arith.
22736   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
22737       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
22738     SDValue AndLHS = N0->getOperand(0);
22739     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
22740     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
22741       // Shift the tested bit over the sign bit.
22742       const APInt &AndMask = ConstAndRHS->getAPIntValue();
22743       unsigned ShCt = AndMask.getBitWidth() - 1;
22744       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
22745         SDValue ShlAmt =
22746           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
22747                           getShiftAmountTy(AndLHS.getValueType()));
22748         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
22749
22750         // Now arithmetic right shift it all the way over, so the result is
22751         // either all-ones, or zero.
22752         SDValue ShrAmt =
22753           DAG.getConstant(ShCt, SDLoc(Shl),
22754                           getShiftAmountTy(Shl.getValueType()));
22755         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
22756
22757         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
22758       }
22759     }
22760   }
22761
22762   // fold select C, 16, 0 -> shl C, 4
22763   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
22764   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
22765
22766   if ((Fold || Swap) &&
22767       TLI.getBooleanContents(CmpOpVT) ==
22768           TargetLowering::ZeroOrOneBooleanContent &&
22769       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
22770
22771     if (Swap) {
22772       CC = ISD::getSetCCInverse(CC, CmpOpVT);
22773       std::swap(N2C, N3C);
22774     }
22775
22776     // If the caller doesn't want us to simplify this into a zext of a compare,
22777     // don't do it.
22778     if (NotExtCompare && N2C->isOne())
22779       return SDValue();
22780
22781     SDValue Temp, SCC;
22782     // zext (setcc n0, n1)
22783     if (LegalTypes) {
22784       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
22785       if (VT.bitsLT(SCC.getValueType()))
22786         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
22787       else
22788         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22789     } else {
22790       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
22791       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22792     }
22793
22794     AddToWorklist(SCC.getNode());
22795     AddToWorklist(Temp.getNode());
22796
22797     if (N2C->isOne())
22798       return Temp;
22799
22800     unsigned ShCt = N2C->getAPIntValue().logBase2();
22801     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
22802       return SDValue();
22803
22804     // shl setcc result by log2 n2c
22805     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
22806                        DAG.getConstant(ShCt, SDLoc(Temp),
22807                                        getShiftAmountTy(Temp.getValueType())));
22808   }
22809
22810   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
22811   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
22812   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
22813   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
22814   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
22815   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
22816   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
22817   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
22818   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
22819     SDValue ValueOnZero = N2;
22820     SDValue Count = N3;
22821     // If the condition is NE instead of E, swap the operands.
22822     if (CC == ISD::SETNE)
22823       std::swap(ValueOnZero, Count);
22824     // Check if the value on zero is a constant equal to the bits in the type.
22825     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
22826       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
22827         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
22828         // legal, combine to just cttz.
22829         if ((Count.getOpcode() == ISD::CTTZ ||
22830              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
22831             N0 == Count.getOperand(0) &&
22832             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
22833           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
22834         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
22835         // legal, combine to just ctlz.
22836         if ((Count.getOpcode() == ISD::CTLZ ||
22837              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
22838             N0 == Count.getOperand(0) &&
22839             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
22840           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
22841       }
22842     }
22843   }
22844
22845   return SDValue();
22846 }
22847
22848 /// This is a stub for TargetLowering::SimplifySetCC.
22849 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
22850                                    ISD::CondCode Cond, const SDLoc &DL,
22851                                    bool foldBooleans) {
22852   TargetLowering::DAGCombinerInfo
22853     DagCombineInfo(DAG, Level, false, this);
22854   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
22855 }
22856
22857 /// Given an ISD::SDIV node expressing a divide by constant, return
22858 /// a DAG expression to select that will generate the same value by multiplying
22859 /// by a magic number.
22860 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22861 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
22862   // when optimising for minimum size, we don't want to expand a div to a mul
22863   // and a shift.
22864   if (DAG.getMachineFunction().getFunction().hasMinSize())
22865     return SDValue();
22866
22867   SmallVector<SDNode *, 8> Built;
22868   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
22869     for (SDNode *N : Built)
22870       AddToWorklist(N);
22871     return S;
22872   }
22873
22874   return SDValue();
22875 }
22876
22877 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
22878 /// DAG expression that will generate the same value by right shifting.
22879 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
22880   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
22881   if (!C)
22882     return SDValue();
22883
22884   // Avoid division by zero.
22885   if (C->isNullValue())
22886     return SDValue();
22887
22888   SmallVector<SDNode *, 8> Built;
22889   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
22890     for (SDNode *N : Built)
22891       AddToWorklist(N);
22892     return S;
22893   }
22894
22895   return SDValue();
22896 }
22897
22898 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
22899 /// expression that will generate the same value by multiplying by a magic
22900 /// number.
22901 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22902 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
22903   // when optimising for minimum size, we don't want to expand a div to a mul
22904   // and a shift.
22905   if (DAG.getMachineFunction().getFunction().hasMinSize())
22906     return SDValue();
22907
22908   SmallVector<SDNode *, 8> Built;
22909   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
22910     for (SDNode *N : Built)
22911       AddToWorklist(N);
22912     return S;
22913   }
22914
22915   return SDValue();
22916 }
22917
22918 /// Determines the LogBase2 value for a non-null input value using the
22919 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
22920 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
22921   EVT VT = V.getValueType();
22922   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
22923   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
22924   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
22925   return LogBase2;
22926 }
22927
22928 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22929 /// For the reciprocal, we need to find the zero of the function:
22930 ///   F(X) = 1/X - A [which has a zero at X = 1/A]
22931 ///     =>
22932 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
22933 ///     does not require additional intermediate precision]
22934 /// For the last iteration, put numerator N into it to gain more precision:
22935 ///   Result = N X_i + X_i (N - N A X_i)
22936 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
22937                                       SDNodeFlags Flags) {
22938   if (LegalDAG)
22939     return SDValue();
22940
22941   // TODO: Handle half and/or extended types?
22942   EVT VT = Op.getValueType();
22943   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22944     return SDValue();
22945
22946   // If estimates are explicitly disabled for this function, we're done.
22947   MachineFunction &MF = DAG.getMachineFunction();
22948   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
22949   if (Enabled == TLI.ReciprocalEstimate::Disabled)
22950     return SDValue();
22951
22952   // Estimates may be explicitly enabled for this type with a custom number of
22953   // refinement steps.
22954   int Iterations = TLI.getDivRefinementSteps(VT, MF);
22955   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
22956     AddToWorklist(Est.getNode());
22957
22958     SDLoc DL(Op);
22959     if (Iterations) {
22960       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
22961
22962       // Newton iterations: Est = Est + Est (N - Arg * Est)
22963       // If this is the last iteration, also multiply by the numerator.
22964       for (int i = 0; i < Iterations; ++i) {
22965         SDValue MulEst = Est;
22966
22967         if (i == Iterations - 1) {
22968           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
22969           AddToWorklist(MulEst.getNode());
22970         }
22971
22972         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
22973         AddToWorklist(NewEst.getNode());
22974
22975         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
22976                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
22977         AddToWorklist(NewEst.getNode());
22978
22979         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22980         AddToWorklist(NewEst.getNode());
22981
22982         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
22983         AddToWorklist(Est.getNode());
22984       }
22985     } else {
22986       // If no iterations are available, multiply with N.
22987       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
22988       AddToWorklist(Est.getNode());
22989     }
22990
22991     return Est;
22992   }
22993
22994   return SDValue();
22995 }
22996
22997 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22998 /// For the reciprocal sqrt, we need to find the zero of the function:
22999 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23000 ///     =>
23001 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
23002 /// As a result, we precompute A/2 prior to the iteration loop.
23003 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
23004                                          unsigned Iterations,
23005                                          SDNodeFlags Flags, bool Reciprocal) {
23006   EVT VT = Arg.getValueType();
23007   SDLoc DL(Arg);
23008   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
23009
23010   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
23011   // this entire sequence requires only one FP constant.
23012   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
23013   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
23014
23015   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
23016   for (unsigned i = 0; i < Iterations; ++i) {
23017     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
23018     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
23019     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
23020     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23021   }
23022
23023   // If non-reciprocal square root is requested, multiply the result by Arg.
23024   if (!Reciprocal)
23025     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
23026
23027   return Est;
23028 }
23029
23030 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23031 /// For the reciprocal sqrt, we need to find the zero of the function:
23032 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23033 ///     =>
23034 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
23035 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
23036                                          unsigned Iterations,
23037                                          SDNodeFlags Flags, bool Reciprocal) {
23038   EVT VT = Arg.getValueType();
23039   SDLoc DL(Arg);
23040   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
23041   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
23042
23043   // This routine must enter the loop below to work correctly
23044   // when (Reciprocal == false).
23045   assert(Iterations > 0);
23046
23047   // Newton iterations for reciprocal square root:
23048   // E = (E * -0.5) * ((A * E) * E + -3.0)
23049   for (unsigned i = 0; i < Iterations; ++i) {
23050     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
23051     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
23052     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
23053
23054     // When calculating a square root at the last iteration build:
23055     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
23056     // (notice a common subexpression)
23057     SDValue LHS;
23058     if (Reciprocal || (i + 1) < Iterations) {
23059       // RSQRT: LHS = (E * -0.5)
23060       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
23061     } else {
23062       // SQRT: LHS = (A * E) * -0.5
23063       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
23064     }
23065
23066     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
23067   }
23068
23069   return Est;
23070 }
23071
23072 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
23073 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
23074 /// Op can be zero.
23075 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
23076                                            bool Reciprocal) {
23077   if (LegalDAG)
23078     return SDValue();
23079
23080   // TODO: Handle half and/or extended types?
23081   EVT VT = Op.getValueType();
23082   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
23083     return SDValue();
23084
23085   // If estimates are explicitly disabled for this function, we're done.
23086   MachineFunction &MF = DAG.getMachineFunction();
23087   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
23088   if (Enabled == TLI.ReciprocalEstimate::Disabled)
23089     return SDValue();
23090
23091   // Estimates may be explicitly enabled for this type with a custom number of
23092   // refinement steps.
23093   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
23094
23095   bool UseOneConstNR = false;
23096   if (SDValue Est =
23097       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
23098                           Reciprocal)) {
23099     AddToWorklist(Est.getNode());
23100
23101     if (Iterations)
23102       Est = UseOneConstNR
23103             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
23104             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
23105     if (!Reciprocal) {
23106       SDLoc DL(Op);
23107       // Try the target specific test first.
23108       SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
23109
23110       // The estimate is now completely wrong if the input was exactly 0.0 or
23111       // possibly a denormal. Force the answer to 0.0 or value provided by
23112       // target for those cases.
23113       Est = DAG.getNode(
23114           Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
23115           Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
23116     }
23117     return Est;
23118   }
23119
23120   return SDValue();
23121 }
23122
23123 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23124   return buildSqrtEstimateImpl(Op, Flags, true);
23125 }
23126
23127 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
23128   return buildSqrtEstimateImpl(Op, Flags, false);
23129 }
23130
23131 /// Return true if there is any possibility that the two addresses overlap.
23132 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
23133
23134   struct MemUseCharacteristics {
23135     bool IsVolatile;
23136     bool IsAtomic;
23137     SDValue BasePtr;
23138     int64_t Offset;
23139     Optional<int64_t> NumBytes;
23140     MachineMemOperand *MMO;
23141   };
23142
23143   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
23144     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
23145       int64_t Offset = 0;
23146       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
23147         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
23148                      ? C->getSExtValue()
23149                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
23150                            ? -1 * C->getSExtValue()
23151                            : 0;
23152       uint64_t Size =
23153           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
23154       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
23155               Offset /*base offset*/,
23156               Optional<int64_t>(Size),
23157               LSN->getMemOperand()};
23158     }
23159     if (const auto *LN = cast<LifetimeSDNode>(N))
23160       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
23161               (LN->hasOffset()) ? LN->getOffset() : 0,
23162               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
23163                                 : Optional<int64_t>(),
23164               (MachineMemOperand *)nullptr};
23165     // Default.
23166     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
23167             (int64_t)0 /*offset*/,
23168             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
23169   };
23170
23171   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
23172                         MUC1 = getCharacteristics(Op1);
23173
23174   // If they are to the same address, then they must be aliases.
23175   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
23176       MUC0.Offset == MUC1.Offset)
23177     return true;
23178
23179   // If they are both volatile then they cannot be reordered.
23180   if (MUC0.IsVolatile && MUC1.IsVolatile)
23181     return true;
23182
23183   // Be conservative about atomics for the moment
23184   // TODO: This is way overconservative for unordered atomics (see D66309)
23185   if (MUC0.IsAtomic && MUC1.IsAtomic)
23186     return true;
23187
23188   if (MUC0.MMO && MUC1.MMO) {
23189     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23190         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23191       return false;
23192   }
23193
23194   // Try to prove that there is aliasing, or that there is no aliasing. Either
23195   // way, we can return now. If nothing can be proved, proceed with more tests.
23196   bool IsAlias;
23197   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
23198                                        DAG, IsAlias))
23199     return IsAlias;
23200
23201   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
23202   // either are not known.
23203   if (!MUC0.MMO || !MUC1.MMO)
23204     return true;
23205
23206   // If one operation reads from invariant memory, and the other may store, they
23207   // cannot alias. These should really be checking the equivalent of mayWrite,
23208   // but it only matters for memory nodes other than load /store.
23209   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
23210       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
23211     return false;
23212
23213   // If we know required SrcValue1 and SrcValue2 have relatively large
23214   // alignment compared to the size and offset of the access, we may be able
23215   // to prove they do not alias. This check is conservative for now to catch
23216   // cases created by splitting vector types, it only works when the offsets are
23217   // multiples of the size of the data.
23218   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
23219   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
23220   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
23221   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
23222   auto &Size0 = MUC0.NumBytes;
23223   auto &Size1 = MUC1.NumBytes;
23224   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
23225       Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
23226       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
23227       SrcValOffset1 % *Size1 == 0) {
23228     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
23229     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
23230
23231     // There is no overlap between these relatively aligned accesses of
23232     // similar size. Return no alias.
23233     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
23234       return false;
23235   }
23236
23237   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
23238                    ? CombinerGlobalAA
23239                    : DAG.getSubtarget().useAA();
23240 #ifndef NDEBUG
23241   if (CombinerAAOnlyFunc.getNumOccurrences() &&
23242       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
23243     UseAA = false;
23244 #endif
23245
23246   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
23247       Size0.hasValue() && Size1.hasValue()) {
23248     // Use alias analysis information.
23249     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
23250     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
23251     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
23252     if (AA->isNoAlias(
23253             MemoryLocation(MUC0.MMO->getValue(), Overlap0,
23254                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
23255             MemoryLocation(MUC1.MMO->getValue(), Overlap1,
23256                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
23257       return false;
23258   }
23259
23260   // Otherwise we have to assume they alias.
23261   return true;
23262 }
23263
23264 /// Walk up chain skipping non-aliasing memory nodes,
23265 /// looking for aliasing nodes and adding them to the Aliases vector.
23266 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
23267                                    SmallVectorImpl<SDValue> &Aliases) {
23268   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
23269   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
23270
23271   // Get alias information for node.
23272   // TODO: relax aliasing for unordered atomics (see D66309)
23273   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
23274
23275   // Starting off.
23276   Chains.push_back(OriginalChain);
23277   unsigned Depth = 0;
23278
23279   // Attempt to improve chain by a single step
23280   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
23281     switch (C.getOpcode()) {
23282     case ISD::EntryToken:
23283       // No need to mark EntryToken.
23284       C = SDValue();
23285       return true;
23286     case ISD::LOAD:
23287     case ISD::STORE: {
23288       // Get alias information for C.
23289       // TODO: Relax aliasing for unordered atomics (see D66309)
23290       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
23291                       cast<LSBaseSDNode>(C.getNode())->isSimple();
23292       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
23293         // Look further up the chain.
23294         C = C.getOperand(0);
23295         return true;
23296       }
23297       // Alias, so stop here.
23298       return false;
23299     }
23300
23301     case ISD::CopyFromReg:
23302       // Always forward past past CopyFromReg.
23303       C = C.getOperand(0);
23304       return true;
23305
23306     case ISD::LIFETIME_START:
23307     case ISD::LIFETIME_END: {
23308       // We can forward past any lifetime start/end that can be proven not to
23309       // alias the memory access.
23310       if (!isAlias(N, C.getNode())) {
23311         // Look further up the chain.
23312         C = C.getOperand(0);
23313         return true;
23314       }
23315       return false;
23316     }
23317     default:
23318       return false;
23319     }
23320   };
23321
23322   // Look at each chain and determine if it is an alias.  If so, add it to the
23323   // aliases list.  If not, then continue up the chain looking for the next
23324   // candidate.
23325   while (!Chains.empty()) {
23326     SDValue Chain = Chains.pop_back_val();
23327
23328     // Don't bother if we've seen Chain before.
23329     if (!Visited.insert(Chain.getNode()).second)
23330       continue;
23331
23332     // For TokenFactor nodes, look at each operand and only continue up the
23333     // chain until we reach the depth limit.
23334     //
23335     // FIXME: The depth check could be made to return the last non-aliasing
23336     // chain we found before we hit a tokenfactor rather than the original
23337     // chain.
23338     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
23339       Aliases.clear();
23340       Aliases.push_back(OriginalChain);
23341       return;
23342     }
23343
23344     if (Chain.getOpcode() == ISD::TokenFactor) {
23345       // We have to check each of the operands of the token factor for "small"
23346       // token factors, so we queue them up.  Adding the operands to the queue
23347       // (stack) in reverse order maintains the original order and increases the
23348       // likelihood that getNode will find a matching token factor (CSE.)
23349       if (Chain.getNumOperands() > 16) {
23350         Aliases.push_back(Chain);
23351         continue;
23352       }
23353       for (unsigned n = Chain.getNumOperands(); n;)
23354         Chains.push_back(Chain.getOperand(--n));
23355       ++Depth;
23356       continue;
23357     }
23358     // Everything else
23359     if (ImproveChain(Chain)) {
23360       // Updated Chain Found, Consider new chain if one exists.
23361       if (Chain.getNode())
23362         Chains.push_back(Chain);
23363       ++Depth;
23364       continue;
23365     }
23366     // No Improved Chain Possible, treat as Alias.
23367     Aliases.push_back(Chain);
23368   }
23369 }
23370
23371 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
23372 /// (aliasing node.)
23373 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
23374   if (OptLevel == CodeGenOpt::None)
23375     return OldChain;
23376
23377   // Ops for replacing token factor.
23378   SmallVector<SDValue, 8> Aliases;
23379
23380   // Accumulate all the aliases to this node.
23381   GatherAllAliases(N, OldChain, Aliases);
23382
23383   // If no operands then chain to entry token.
23384   if (Aliases.size() == 0)
23385     return DAG.getEntryNode();
23386
23387   // If a single operand then chain to it.  We don't need to revisit it.
23388   if (Aliases.size() == 1)
23389     return Aliases[0];
23390
23391   // Construct a custom tailored token factor.
23392   return DAG.getTokenFactor(SDLoc(N), Aliases);
23393 }
23394
23395 namespace {
23396 // TODO: Replace with with std::monostate when we move to C++17.
23397 struct UnitT { } Unit;
23398 bool operator==(const UnitT &, const UnitT &) { return true; }
23399 bool operator!=(const UnitT &, const UnitT &) { return false; }
23400 } // namespace
23401
23402 // This function tries to collect a bunch of potentially interesting
23403 // nodes to improve the chains of, all at once. This might seem
23404 // redundant, as this function gets called when visiting every store
23405 // node, so why not let the work be done on each store as it's visited?
23406 //
23407 // I believe this is mainly important because mergeConsecutiveStores
23408 // is unable to deal with merging stores of different sizes, so unless
23409 // we improve the chains of all the potential candidates up-front
23410 // before running mergeConsecutiveStores, it might only see some of
23411 // the nodes that will eventually be candidates, and then not be able
23412 // to go from a partially-merged state to the desired final
23413 // fully-merged state.
23414
23415 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
23416   SmallVector<StoreSDNode *, 8> ChainedStores;
23417   StoreSDNode *STChain = St;
23418   // Intervals records which offsets from BaseIndex have been covered. In
23419   // the common case, every store writes to the immediately previous address
23420   // space and thus merged with the previous interval at insertion time.
23421
23422   using IMap =
23423       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
23424   IMap::Allocator A;
23425   IMap Intervals(A);
23426
23427   // This holds the base pointer, index, and the offset in bytes from the base
23428   // pointer.
23429   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23430
23431   // We must have a base and an offset.
23432   if (!BasePtr.getBase().getNode())
23433     return false;
23434
23435   // Do not handle stores to undef base pointers.
23436   if (BasePtr.getBase().isUndef())
23437     return false;
23438
23439   // Do not handle stores to opaque types
23440   if (St->getMemoryVT().isZeroSized())
23441     return false;
23442
23443   // BaseIndexOffset assumes that offsets are fixed-size, which
23444   // is not valid for scalable vectors where the offsets are
23445   // scaled by `vscale`, so bail out early.
23446   if (St->getMemoryVT().isScalableVector())
23447     return false;
23448
23449   // Add ST's interval.
23450   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
23451
23452   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
23453     if (Chain->getMemoryVT().isScalableVector())
23454       return false;
23455
23456     // If the chain has more than one use, then we can't reorder the mem ops.
23457     if (!SDValue(Chain, 0)->hasOneUse())
23458       break;
23459     // TODO: Relax for unordered atomics (see D66309)
23460     if (!Chain->isSimple() || Chain->isIndexed())
23461       break;
23462
23463     // Find the base pointer and offset for this memory node.
23464     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
23465     // Check that the base pointer is the same as the original one.
23466     int64_t Offset;
23467     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
23468       break;
23469     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
23470     // Make sure we don't overlap with other intervals by checking the ones to
23471     // the left or right before inserting.
23472     auto I = Intervals.find(Offset);
23473     // If there's a next interval, we should end before it.
23474     if (I != Intervals.end() && I.start() < (Offset + Length))
23475       break;
23476     // If there's a previous interval, we should start after it.
23477     if (I != Intervals.begin() && (--I).stop() <= Offset)
23478       break;
23479     Intervals.insert(Offset, Offset + Length, Unit);
23480
23481     ChainedStores.push_back(Chain);
23482     STChain = Chain;
23483   }
23484
23485   // If we didn't find a chained store, exit.
23486   if (ChainedStores.size() == 0)
23487     return false;
23488
23489   // Improve all chained stores (St and ChainedStores members) starting from
23490   // where the store chain ended and return single TokenFactor.
23491   SDValue NewChain = STChain->getChain();
23492   SmallVector<SDValue, 8> TFOps;
23493   for (unsigned I = ChainedStores.size(); I;) {
23494     StoreSDNode *S = ChainedStores[--I];
23495     SDValue BetterChain = FindBetterChain(S, NewChain);
23496     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
23497         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
23498     TFOps.push_back(SDValue(S, 0));
23499     ChainedStores[I] = S;
23500   }
23501
23502   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
23503   SDValue BetterChain = FindBetterChain(St, NewChain);
23504   SDValue NewST;
23505   if (St->isTruncatingStore())
23506     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
23507                               St->getBasePtr(), St->getMemoryVT(),
23508                               St->getMemOperand());
23509   else
23510     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
23511                          St->getBasePtr(), St->getMemOperand());
23512
23513   TFOps.push_back(NewST);
23514
23515   // If we improved every element of TFOps, then we've lost the dependence on
23516   // NewChain to successors of St and we need to add it back to TFOps. Do so at
23517   // the beginning to keep relative order consistent with FindBetterChains.
23518   auto hasImprovedChain = [&](SDValue ST) -> bool {
23519     return ST->getOperand(0) != NewChain;
23520   };
23521   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
23522   if (AddNewChain)
23523     TFOps.insert(TFOps.begin(), NewChain);
23524
23525   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
23526   CombineTo(St, TF);
23527
23528   // Add TF and its operands to the worklist.
23529   AddToWorklist(TF.getNode());
23530   for (const SDValue &Op : TF->ops())
23531     AddToWorklist(Op.getNode());
23532   AddToWorklist(STChain);
23533   return true;
23534 }
23535
23536 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
23537   if (OptLevel == CodeGenOpt::None)
23538     return false;
23539
23540   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23541
23542   // We must have a base and an offset.
23543   if (!BasePtr.getBase().getNode())
23544     return false;
23545
23546   // Do not handle stores to undef base pointers.
23547   if (BasePtr.getBase().isUndef())
23548     return false;
23549
23550   // Directly improve a chain of disjoint stores starting at St.
23551   if (parallelizeChainedStores(St))
23552     return true;
23553
23554   // Improve St's Chain..
23555   SDValue BetterChain = FindBetterChain(St, St->getChain());
23556   if (St->getChain() != BetterChain) {
23557     replaceStoreChain(St, BetterChain);
23558     return true;
23559   }
23560   return false;
23561 }
23562
23563 /// This is the entry point for the file.
23564 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
23565                            CodeGenOpt::Level OptLevel) {
23566   /// This is the main entry point to this class.
23567   DAGCombiner(*this, AA, OptLevel).Run(Level);
23568 }